gem5  v21.0.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FaultModel.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011 Massachusetts Institute of Technology
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met: redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer;
9  * redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution;
12  * neither the name of the copyright holders nor the names of its
13  * contributors may be used to endorse or promote products derived from
14  * this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Official Tool Website: www.mit.edu/~kaisopos/FaultModel
31  *
32  * If you use our tool for academic research, we request that you cite:
33  * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling
34  * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip.
35  * Proceedings of the 48th Design Automation Conference (DAC'11)
36  */
37 
38 // C++ includes
39 #include <cassert>
40 #include <fstream>
41 #include <iostream>
42 #include <vector>
43 
44 // GEM5 includes
45 #include "FaultModel.hh"
46 #include "base/logging.hh"
47 
48 #define MAX(a,b) ((a > b) ? (a) : (b))
49 
50 
52 {
53  // read configurations into "configurations" vector
54  // format: <buff/vc> <vcs> <10 fault types>
55  bool more_records = true;
56  for (int i = 0; more_records; i += (fields_per_conf_record)){
57  system_conf configuration;
58  configuration.buff_per_vc =
59  p.baseline_fault_vector_database[i + conf_record_buff_per_vc];
60  configuration.vcs =
61  p.baseline_fault_vector_database[i + conf_record_vcs];
62  for (int fault_index = 0; fault_index < number_of_fault_types;
63  fault_index++){
64  configuration.fault_type[fault_index] =
65  p.baseline_fault_vector_database[i +
66  conf_record_first_fault_type + fault_index] / 100;
67  }
68  configurations.push_back(configuration);
69  if (p.baseline_fault_vector_database[i+fields_per_conf_record] < 0){
70  more_records = false;
71  }
72  }
73 
74  // read temperature weights into "temperature_weights" vector
75  // format: <temperature> <weight>
76  more_records = true;
77  for (int i = 0; more_records; i += (fields_per_temperature_record)){
78  int record_temperature =
79  p.temperature_weights_database[i + temperature_record_temp];
80  int record_weight =
81  p.temperature_weights_database[i + temperature_record_weight];
82  static int first_record = true;
83  if (first_record){
84  for (int temperature = 0; temperature < record_temperature;
85  temperature++){
86  temperature_weights.push_back(0);
87  }
88  first_record = false;
89  }
90  assert(record_temperature == temperature_weights.size());
91  temperature_weights.push_back(record_weight);
92  if (p.temperature_weights_database[i +
94  more_records = false;
95  }
96  }
97 }
98 
99 std::string
101 {
102  if (ft == data_corruption__few_bits){
103  return "data_corruption__few_bits";
104  } else if (ft == data_corruption__all_bits){
105  return "data_corruption__all_bits";
106  } else if (ft == flit_conservation__flit_duplication){
107  return "flit_conservation__flit_duplication";
108  } else if (ft == flit_conservation__flit_loss_or_split){
109  return "flit_conservation__flit_loss_or_split";
110  } else if (ft == misrouting){
111  return "misrouting";
112  } else if (ft == credit_conservation__credit_generation){
113  return "credit_conservation__credit_generation";
114  } else if (ft == credit_conservation__credit_loss){
115  return "credit_conservation__credit_loss";
116  } else if (ft == erroneous_allocation__VC){
117  return "erroneous_allocation__VC";
118  } else if (ft == erroneous_allocation__switch){
119  return "erroneous_allocation__switch";
120  } else if (ft == unfair_arbitration){
121  return "unfair_arbitration";
122  } else if (ft == number_of_fault_types){
123  return "none";
124  } else {
125  return "none";
126  }
127 }
128 
129 
130 int
131 FaultModel::declare_router(int number_of_inputs,
132  int number_of_outputs,
133  int number_of_vcs_per_input,
134  int number_of_buff_per_data_vc,
135  int number_of_buff_per_ctrl_vc)
136 {
137  // check inputs (are they legal?)
138  if (number_of_inputs <= 0 || number_of_outputs <= 0 ||
139  number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 ||
140  number_of_buff_per_ctrl_vc <= 0){
141  fatal("Fault Model: ERROR in argument of FaultModel_declare_router!");
142  }
143  int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc,
144  number_of_buff_per_ctrl_vc);
145  int total_vcs = number_of_inputs * number_of_vcs_per_input;
146  if (total_vcs > MAX_VCs){
147  fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported");
148  }
149  if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){
150  fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high");
151  }
152 
153  // link the router to a DB record
154  int record_hit = -1;
155  for (int record = 0; record < configurations.size(); record++){
156  if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&&
157  (configurations[record].vcs == total_vcs)){
158  record_hit = record;
159  }
160  }
161  if (record_hit == -1){
162  panic("Fault Model: ERROR! configuration not found in DB. BUG?");
163  }
164 
165  // remember the router and return its ID
166  routers.push_back(configurations[record_hit]);
167  static int router_index = 0;
168  return router_index++;
169 }
170 
171 bool
173  int temperature_input,
174  float fault_vector[])
175 {
176  bool ok = true;
177 
178  // is the routerID recorded?
179  if (routerID < 0 || routerID >= ((int) routers.size())){
180  warn("Fault Model: ERROR! unknown router ID argument.");
181  fatal("Fault Model: Did you enable the fault model flag)?");
182  }
183 
184  // is the temperature too high/too low?
185  int temperature = temperature_input;
186  if (temperature_input >= ((int) temperature_weights.size())){
187  ok = false;
188  warn_once("Fault Model: Temperature exceeded simulated upper bound.");
189  warn_once("Fault Model: The fault model is not accurate any more.");
190  temperature = (temperature_weights.size() - 1);
191  } else if (temperature_input < 0){
192  ok = false;
193  warn_once("Fault Model: Temperature exceeded simulated lower bound.");
194  warn_once("Fault Model: The fault model is not accurate any more.");
195  temperature = 0;
196  }
197 
198  // recover the router record and return its fault vector
199  for (int i = 0; i < number_of_fault_types; i++){
200  fault_vector[i] = routers[routerID].fault_type[i] *
201  ((float)temperature_weights[temperature]);
202  }
203  return ok;
204 }
205 
206 bool
208  int temperature_input,
209  float *aggregate_fault_prob)
210 {
211  *aggregate_fault_prob = 1.0;
212  bool ok = true;
213 
214  // is the routerID recorded?
215  if (routerID < 0 || routerID >= ((int) routers.size())){
216  warn("Fault Model: ERROR! unknown router ID argument.");
217  fatal("Fault Model: Did you enable the fault model flag)?");
218  }
219 
220  // is the temperature too high/too low?
221  int temperature = temperature_input;
222  if (temperature_input >= ((int) temperature_weights.size()) ){
223  ok = false;
224  warn_once("Fault Model: Temperature exceeded simulated upper bound.");
225  warn_once("Fault Model: The fault model is not accurate any more.");
226  temperature = (temperature_weights.size()-1);
227  } else if (temperature_input < 0){
228  ok = false;
229  warn_once("Fault Model: Temperature exceeded simulated lower bound.");
230  warn_once("Fault Model: The fault model is not accurate any more.");
231  temperature = 0;
232  }
233 
234  // recover the router record and return its aggregate fault probability
235  for (int i = 0; i < number_of_fault_types; i++){
236  *aggregate_fault_prob= *aggregate_fault_prob *
237  ( 1.0 - (routers[routerID].fault_type[i] *
238  ((float)temperature_weights[temperature])) );
239  }
240  *aggregate_fault_prob = 1.0 - *aggregate_fault_prob;
241  return ok;
242 }
243 
244 // this function is used only for debugging purposes
245 void
247 {
248  std::cout << "--- PRINTING configurations ---\n";
249  for (int record = 0; record < configurations.size(); record++){
250  std::cout << "(" << record << ") ";
251  std::cout << "VCs=" << configurations[record].vcs << " ";
252  std::cout << "Buff/VC=" << configurations[record].buff_per_vc << " [";
253  for (int fault_type_num = 0;
254  fault_type_num < number_of_fault_types;
255  fault_type_num++){
256  std::cout <<
257  (100 * configurations[record].fault_type[fault_type_num]);
258  std::cout << "% ";
259  }
260  std::cout << "]\n";
261  }
262  std::cout << "--- PRINTING temperature weights ---\n";
263  for (int record = 0; record < temperature_weights.size(); record++){
264  std::cout << "temperature=" << record << " => ";
265  std::cout << "weight=" << temperature_weights[record];
266  std::cout << "\n";
267  }
268 }
FaultModel::conf_record_vcs
@ conf_record_vcs
Definition: FaultModel.hh:85
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
FaultModel::temperature_weights
std::vector< int > temperature_weights
Definition: FaultModel.hh:135
FaultModel::fields_per_conf_record
@ fields_per_conf_record
Definition: FaultModel.hh:88
warn
#define warn(...)
Definition: logging.hh:239
FaultModel::Params
FaultModelParams Params
Definition: FaultModel.hh:56
FaultModel::data_corruption__few_bits
@ data_corruption__few_bits
Definition: FaultModel.hh:65
FaultModel::number_of_fault_types
@ number_of_fault_types
Definition: FaultModel.hh:75
FaultModel::credit_conservation__credit_generation
@ credit_conservation__credit_generation
Definition: FaultModel.hh:70
FaultModel::flit_conservation__flit_loss_or_split
@ flit_conservation__flit_loss_or_split
Definition: FaultModel.hh:68
warn_once
#define warn_once(...)
Definition: logging.hh:243
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
FaultModel::fault_vector
bool fault_vector(int routerID, int temperature, float fault_vector[])
Definition: FaultModel.cc:172
MAX_VCs
#define MAX_VCs
Definition: FaultModel.hh:42
FaultModel::erroneous_allocation__VC
@ erroneous_allocation__VC
Definition: FaultModel.hh:72
FaultModel::routers
std::vector< system_conf > routers
Definition: FaultModel.hh:134
FaultModel::temperature_record_temp
@ temperature_record_temp
Definition: FaultModel.hh:93
FaultModel::data_corruption__all_bits
@ data_corruption__all_bits
Definition: FaultModel.hh:66
FaultModel::misrouting
@ misrouting
Definition: FaultModel.hh:69
FaultModel::configurations
std::vector< system_conf > configurations
Definition: FaultModel.hh:133
FaultModel::temperature_record_weight
@ temperature_record_weight
Definition: FaultModel.hh:94
FaultModel::system_conf::fault_type
float fault_type[number_of_fault_types]
Definition: FaultModel.hh:102
FaultModel::flit_conservation__flit_duplication
@ flit_conservation__flit_duplication
Definition: FaultModel.hh:67
FaultModel::fault_type_to_string
std::string fault_type_to_string(int fault_type_index)
Definition: FaultModel.cc:100
FaultModel::unfair_arbitration
@ unfair_arbitration
Definition: FaultModel.hh:74
FaultModel::system_conf
Definition: FaultModel.hh:98
MAX
#define MAX(a, b)
Definition: FaultModel.cc:48
FaultModel.hh
FaultModel::declare_router
int declare_router(int number_of_inputs, int number_of_outputs, int number_of_vcs_per_vnet, int number_of_buff_per_data_vc, int number_of_buff_per_ctrl_vc)
Definition: FaultModel.cc:131
FaultModel::conf_record_first_fault_type
@ conf_record_first_fault_type
Definition: FaultModel.hh:86
FaultModel::FaultModel
FaultModel(const Params &p)
Definition: FaultModel.cc:51
FaultModel::system_conf::buff_per_vc
int buff_per_vc
Definition: FaultModel.hh:101
FaultModel::erroneous_allocation__switch
@ erroneous_allocation__switch
Definition: FaultModel.hh:73
FaultModel::print
void print(void)
Definition: FaultModel.cc:246
logging.hh
FaultModel::fault_prob
bool fault_prob(int routerID, int temperature, float *aggregate_fault_prob)
Definition: FaultModel.cc:207
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
FaultModel::system_conf::vcs
int vcs
Definition: FaultModel.hh:100
FaultModel::conf_record_buff_per_vc
@ conf_record_buff_per_vc
Definition: FaultModel.hh:84
FaultModel::fields_per_temperature_record
@ fields_per_temperature_record
Definition: FaultModel.hh:95
MAX_BUFFERS_per_VC
#define MAX_BUFFERS_per_VC
Definition: FaultModel.hh:43
FaultModel::credit_conservation__credit_loss
@ credit_conservation__credit_loss
Definition: FaultModel.hh:71
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:141

Generated on Tue Jun 22 2021 15:28:29 for gem5 by doxygen 1.8.17