| 1 |
02482b9a
|
Alessandro_N
|
|
| 2 |
|
|
|
| 3 |
|
|
|
| 4 |
|
|
|
| 5 |
|
|
|
| 6 |
|
|
|
| 7 |
|
|
|
| 8 |
|
|
|
| 9 |
|
|
|
| 10 |
|
|
|
| 11 |
|
|
|
| 12 |
|
|
|
| 13 |
|
|
|
| 14 |
|
|
|
| 15 |
|
|
|
| 16 |
|
|
|
| 17 |
|
|
|
| 18 |
|
|
|
| 19 |
|
|
|
| 20 |
|
|
|
| 21 |
|
|
'''
|
| 22 |
|
|
This is a script aimed at monitoring the status of the Sitools2 datasets
|
| 23 |
|
|
and the mapping of their fields.
|
| 24 |
|
|
|
| 25 |
|
|
Put the script in the Sitools2 folder data/datasets(/map, for the latest versions)
|
| 26 |
|
|
where the datasets information are stored as xml files.
|
| 27 |
|
|
Then, read and record the current status/mapping, with:
|
| 28 |
|
|
|
| 29 |
|
|
$ datasets_Monitoring --record
|
| 30 |
|
|
|
| 31 |
|
|
The datasets properties are locally stored in files named as:
|
| 32 |
|
|
|
| 33 |
|
|
<dataset_name>.lastStatus.xml
|
| 34 |
|
|
|
| 35 |
|
|
The '--record' option should be executed manually by the administrator every time
|
| 36 |
|
|
one or more datasets are modified (it would be good to add a reminder to the
|
| 37 |
|
|
Sitools2 pop-up message).
|
| 38 |
|
|
|
| 39 |
|
|
To check the datasets mapping/status, launch the script with the '--check' option:
|
| 40 |
|
|
|
| 41 |
|
|
$ datasets_Monitoring --check
|
| 42 |
|
|
|
| 43 |
|
|
This performs a consistency check between the current (int@*.xml or map/string@*.xml)
|
| 44 |
|
|
and the last recorded status (<dataset_name>.lastStatus.xml).
|
| 45 |
|
|
If any inconsistency is found, an alert e-mail is sent.
|
| 46 |
|
|
|
| 47 |
|
|
The '--check' option should be run daily and automatically by the system.
|
| 48 |
|
|
|
| 49 |
|
|
@author: Alessandro NASTASI for IAS -IDOC
|
| 50 |
|
|
@date: 27/04/2015
|
| 51 |
|
|
'''
|
| 52 |
|
|
|
| 53 |
|
|
__author__ = "Alessandro Nastasi"
|
| 54 |
|
|
__credits__ = ["Alessandro Nastasi", "Herve' Ballans"]
|
| 55 |
|
|
__license__ = "GPL"
|
| 56 |
|
|
__version__ = "1.0"
|
| 57 |
|
|
__date__ = "27/04/2015"
|
| 58 |
|
|
|
| 59 |
|
|
import sys,os, time
|
| 60 |
|
|
from datetime import date
|
| 61 |
|
|
import xml.etree.ElementTree as ET
|
| 62 |
|
|
import smtplib
|
| 63 |
|
|
from email.mime.text import MIMEText
|
| 64 |
|
|
|
| 65 |
|
|
sitools2_xml_filenames = "string@*.xml"
|
| 66 |
|
|
file_path='/usr/local/Sitools2_SZ_Cluster_DB/data/datasets/map/'
|
| 67 |
|
|
|
| 68 |
|
|
class bcolors:
|
| 69 |
|
|
HEADER = '\033[95m'
|
| 70 |
|
|
OKBLUE = '\033[94m'
|
| 71 |
|
|
OKGREEN = '\033[92m'
|
| 72 |
|
|
WARNING = '\033[93m'
|
| 73 |
|
|
FAIL = '\033[91m'
|
| 74 |
|
|
ENDC = '\033[0m'
|
| 75 |
|
|
|
| 76 |
|
|
_ERROR_CODE = {
|
| 77 |
|
|
1:'Status inconsistency found:',
|
| 78 |
|
|
2:'Mapping inconsistency found:',
|
| 79 |
|
|
3:"The *.lastStatus.xml files were probably not updated: re-run the script with '--record' option."
|
| 80 |
|
|
}
|
| 81 |
|
|
|
| 82 |
|
|
def create_id_alias_dict(xml_root):
|
| 83 |
|
|
|
| 84 |
|
|
id_alias_dict = {}
|
| 85 |
|
|
for col in xml_root.findall('column'):
|
| 86 |
|
|
column_id = col.find('id').text
|
| 87 |
|
|
column_alias = col.find('columnAlias').text
|
| 88 |
|
|
id_alias_dict[column_id] = column_alias
|
| 89 |
|
|
|
| 90 |
|
|
return id_alias_dict
|
| 91 |
|
|
|
| 92 |
|
|
|
| 93 |
|
|
def send_alert_mail(body):
|
| 94 |
|
|
SMTP_SERVER = 'smtp.ias.u-psud.fr'
|
| 95 |
|
|
SMTP_PORT = 25
|
| 96 |
|
|
|
| 97 |
|
|
sender = 'sitools2.notifier@ias.u-psud.fr'
|
| 98 |
|
|
|
| 99 |
|
|
recipient = 'sitools2@ias.u-psud.fr'
|
| 100 |
|
|
subject = '[Sitools2 - SZDB] Datasets status ALERT'
|
| 101 |
|
|
|
| 102 |
|
|
headers = ["From: "+sender,
|
| 103 |
|
|
"Subject: " + subject,
|
| 104 |
|
|
"To: " + recipient,
|
| 105 |
|
|
"MIME-Version: 1.0",
|
| 106 |
|
|
"Content-Type: text/html"]
|
| 107 |
|
|
headers = "\r\n".join(headers)
|
| 108 |
|
|
|
| 109 |
|
|
session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
|
| 110 |
|
|
|
| 111 |
|
|
|
| 112 |
|
|
|
| 113 |
|
|
|
| 114 |
|
|
|
| 115 |
|
|
|
| 116 |
|
|
body = MIMEText(body, 'html')
|
| 117 |
|
|
session.sendmail(sender, recipient, headers + "\r\n\r\n" + body.as_string())
|
| 118 |
|
|
session.quit()
|
| 119 |
|
|
|
| 120 |
|
|
def record_status():
|
| 121 |
|
|
print "\n> Recording current datasets properties ...\n"
|
| 122 |
|
|
|
| 123 |
|
|
command = "ls "+file_path+sitools2_xml_filenames
|
| 124 |
|
|
intXml_list = os.popen(command).readlines()
|
| 125 |
|
|
|
| 126 |
|
|
for item in intXml_list:
|
| 127 |
|
|
item=item.strip()
|
| 128 |
|
|
|
| 129 |
|
|
tree = ET.parse(item)
|
| 130 |
|
|
root = tree.getroot()
|
| 131 |
|
|
|
| 132 |
|
|
dataset_name = root.find('name').text
|
| 133 |
|
|
fileDataset = file_path+dataset_name+'.lastStatus.xml'
|
| 134 |
|
|
fileOut = open(fileDataset, 'w')
|
| 135 |
|
|
|
| 136 |
|
|
|
| 137 |
|
|
today = date.today().strftime("%A %d. %B %Y")
|
| 138 |
|
|
towrite = "<!--File recorded on "+str(today)+"-->\n"
|
| 139 |
|
|
fileOut.write(towrite)
|
| 140 |
|
|
|
| 141 |
|
|
|
| 142 |
|
|
towrite = "<dataset>\n"
|
| 143 |
|
|
fileOut.write(towrite)
|
| 144 |
|
|
towrite = "<!--Last Dataset status:-->\n"
|
| 145 |
|
|
fileOut.write(towrite)
|
| 146 |
|
|
status = root.find('status').text
|
| 147 |
|
|
fileOut.write(" <lastStatus>"+status+"</lastStatus>")
|
| 148 |
|
|
|
| 149 |
|
|
|
| 150 |
|
|
column = create_id_alias_dict(root)
|
| 151 |
|
|
|
| 152 |
|
|
|
| 153 |
|
|
mapped_Concepts_Id = []
|
| 154 |
|
|
mapped_Column_Id = []
|
| 155 |
|
|
for elem in root.findall('conceptId'): mapped_Concepts_Id.append(elem.text)
|
| 156 |
|
|
for elem in root.findall('columnId'): mapped_Column_Id.append(elem.text)
|
| 157 |
|
|
|
| 158 |
|
|
num_mapped_Concepts = len(mapped_Concepts_Id)
|
| 159 |
|
|
|
| 160 |
|
|
|
| 161 |
|
|
|
| 162 |
|
|
towrite = "\n<!--Last mapping:-->\n <!--mappedColumns-->\n"
|
| 163 |
|
|
fileOut.write(towrite)
|
| 164 |
|
|
towrite = " <totNum>"+str(num_mapped_Concepts)+"</totNum>\n"
|
| 165 |
|
|
fileOut.write(towrite)
|
| 166 |
|
|
|
| 167 |
|
|
for i,item in enumerate(mapped_Column_Id):
|
| 168 |
|
|
towrite=" <columnId>"+str(item)+"</columnId>\n"
|
| 169 |
|
|
fileOut.write(towrite)
|
| 170 |
|
|
towrite=" <columnAlias>"+str(column[item])+"</columnAlias>\n"
|
| 171 |
|
|
fileOut.write(towrite)
|
| 172 |
|
|
towrite=" <conceptId>"+str(mapped_Concepts_Id[i])+"</conceptId>\n"
|
| 173 |
|
|
fileOut.write(towrite)
|
| 174 |
|
|
|
| 175 |
|
|
fileOut.write(" <!--/mappedColumns-->\n</dataset>")
|
| 176 |
|
|
fileOut.close()
|
| 177 |
|
|
print " - Current status and mapping of %s written in %s\n" % (dataset_name, fileDataset)
|
| 178 |
|
|
|
| 179 |
|
|
def check_status():
|
| 180 |
|
|
|
| 181 |
|
|
today = date.today().strftime("%A %d. %B %Y")
|
| 182 |
|
|
now = time.strftime("%H:%M:%S")
|
| 183 |
|
|
check_datime = today +' at '+ now
|
| 184 |
|
|
print "\n#\n#Last check done on", check_datime,"\n#"
|
| 185 |
|
|
print "\n> Checking datasets properties ..."
|
| 186 |
|
|
command = "ls "+file_path+sitools2_xml_filenames
|
| 187 |
|
|
intXml_list = os.popen(command).readlines()
|
| 188 |
|
|
error_status, error_mapping, warning = False, False, False
|
| 189 |
|
|
email_body = "<br></br><i>Outcome of the datasets check done on %s</i>" % check_datime
|
| 190 |
|
|
for item in intXml_list:
|
| 191 |
|
|
item=item.strip()
|
| 192 |
|
|
email_body+= "\n"
|
| 193 |
|
|
tree = ET.parse(item)
|
| 194 |
|
|
currentRoot = tree.getroot()
|
| 195 |
|
|
|
| 196 |
|
|
dataset_name = currentRoot.find('name').text
|
| 197 |
|
|
filename_dataset = file_path+dataset_name+'.lastStatus.xml'
|
| 198 |
|
|
print "\n - ",dataset_name
|
| 199 |
|
|
|
| 200 |
|
|
tree = ET.parse(filename_dataset)
|
| 201 |
|
|
lastRoot = tree.getroot()
|
| 202 |
|
|
|
| 203 |
|
|
|
| 204 |
|
|
|
| 205 |
|
|
|
| 206 |
|
|
|
| 207 |
|
|
lastStatus = lastRoot.find('lastStatus').text
|
| 208 |
|
|
currentStatus = currentRoot.find('status').text
|
| 209 |
|
|
output_check = True
|
| 210 |
|
|
showStatus = ""
|
| 211 |
|
|
output_message = ""
|
| 212 |
|
|
|
| 213 |
|
|
if lastStatus != currentStatus:
|
| 214 |
|
|
if currentStatus == 'INACTIVE':
|
| 215 |
|
|
output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
|
| 216 |
|
|
output_message = "\n "+bcolors.FAIL+_ERROR_CODE[1]+bcolors.ENDC
|
| 217 |
|
|
output_message += "\n Current status: "+bcolors.FAIL+currentStatus+bcolors.ENDC+" - last status: "+bcolors.FAIL+lastStatus+bcolors.ENDC
|
| 218 |
|
|
email_body+="<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[1]+" ***</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
|
| 219 |
|
|
else:
|
| 220 |
|
|
warning = True
|
| 221 |
|
|
output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
|
| 222 |
|
|
output_message = "\n "+bcolors.WARNING+_ERROR_CODE[1]+bcolors.ENDC
|
| 223 |
|
|
output_message += "\n Current status: "+bcolors.WARNING+currentStatus+bcolors.ENDC+" - last status: "+bcolors.WARNING+lastStatus+bcolors.ENDC
|
| 224 |
|
|
email_body+="<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[1]+"</b><p>Current status: <b>"+currentStatus+"</b> , last status: <b>"+lastStatus+"</b>."
|
| 225 |
|
|
|
| 226 |
|
|
error_status = True
|
| 227 |
|
|
showStatus=""
|
| 228 |
|
|
|
| 229 |
|
|
else:
|
| 230 |
|
|
output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
|
| 231 |
|
|
showStatus=" - "+currentStatus
|
| 232 |
|
|
|
| 233 |
|
|
print ' {0:20s} {1:7s}'.format('Status check ...', output_check)+showStatus+output_message
|
| 234 |
|
|
|
| 235 |
|
|
|
| 236 |
|
|
|
| 237 |
|
|
|
| 238 |
|
|
|
| 239 |
|
|
|
| 240 |
|
|
last_current_mapped_Column_Id = []
|
| 241 |
|
|
for elem in lastRoot.findall('columnId'): last_current_mapped_Column_Id.append(elem.text)
|
| 242 |
|
|
|
| 243 |
|
|
|
| 244 |
|
|
current_mapped_Column_Id = []
|
| 245 |
|
|
for elem in currentRoot.findall('columnId'): current_mapped_Column_Id.append(elem.text)
|
| 246 |
|
|
|
| 247 |
|
|
output_check = ""
|
| 248 |
|
|
|
| 249 |
|
|
|
| 250 |
|
|
|
| 251 |
|
|
column = create_id_alias_dict(currentRoot)
|
| 252 |
|
|
|
| 253 |
|
|
|
| 254 |
|
|
missing_current_columnAlias = [column[columnId] for columnId in last_current_mapped_Column_Id if columnId not in current_mapped_Column_Id]
|
| 255 |
|
|
|
| 256 |
|
|
missing_last_columnAlias = [column[columnId] for columnId in current_mapped_Column_Id if columnId not in last_current_mapped_Column_Id]
|
| 257 |
|
|
|
| 258 |
|
|
output_message = ""
|
| 259 |
|
|
|
| 260 |
|
|
if len(missing_current_columnAlias) == 0 and len(missing_last_columnAlias) == 0:
|
| 261 |
|
|
output_check = bcolors.OKGREEN+"[OK]"+bcolors.ENDC
|
| 262 |
|
|
elif len(missing_last_columnAlias) > 0:
|
| 263 |
|
|
warning = True
|
| 264 |
|
|
output_check = bcolors.WARNING+"[FAIL]"+bcolors.ENDC
|
| 265 |
|
|
output_message = "\n "+bcolors.WARNING+_ERROR_CODE[2]+bcolors.ENDC
|
| 266 |
|
|
output_message += "\n Some columns are mapped in the new version, but not in the last one: "+bcolors.WARNING+str(missing_last_columnAlias)+bcolors.ENDC
|
| 267 |
|
|
email_body += "<h3>"+dataset_name+"</h3><b>"+_ERROR_CODE[2]+"</b><p> Some columns are mapped in the new version, but not in the last one: "+str(missing_last_columnAlias)+"</p>"
|
| 268 |
|
|
elif len(missing_current_columnAlias) > 0 and len(missing_last_columnAlias) == 0:
|
| 269 |
|
|
output_check = bcolors.FAIL+"[FAIL]"+bcolors.ENDC
|
| 270 |
|
|
output_message = "\n "+bcolors.FAIL+_ERROR_CODE[2]+bcolors.ENDC
|
| 271 |
|
|
output_message += "\n The following column(s) is(are) not mapped anymore: "+bcolors.FAIL+str(missing_current_columnAlias)+bcolors.ENDC
|
| 272 |
|
|
email_body += "<h3>"+dataset_name+"</h3><b>*** "+_ERROR_CODE[2]+" ***</b><p> The following column(s) is(are) not mapped anymore: <b>"+str(missing_current_columnAlias)+"</b></p>"
|
| 273 |
|
|
|
| 274 |
|
|
error_mapping = True
|
| 275 |
|
|
|
| 276 |
|
|
print ' {0:20s} {1:7s}'.format('Mapping check ...', output_check)+output_message
|
| 277 |
|
|
|
| 278 |
|
|
print "\n__________________________________________________\n\n> Outcome of the check process:"
|
| 279 |
|
|
email_body += "<p>__________________________________________________</p><p>The check process has produced the following message:"
|
| 280 |
|
|
|
| 281 |
|
|
|
| 282 |
|
|
if error_status or error_mapping or warning:
|
| 283 |
|
|
if warning:
|
| 284 |
|
|
print "\n "+bcolors.WARNING+_ERROR_CODE[3]+bcolors.ENDC
|
| 285 |
|
|
email_body += "<p></p><i><h4>"+_ERROR_CODE[3]+'</h4></i></p>'
|
| 286 |
|
|
else:
|
| 287 |
|
|
print bcolors.FAIL+'\n Unexpected errors have been found! Please check the datasets properties in Sitools2.'+bcolors.ENDC
|
| 288 |
|
|
email_body += "<p></p><h4>Unexpected errors have been found! Please check the datasets properties in Sitools2.</h4></br></p>"
|
| 289 |
|
|
|
| 290 |
|
|
send_alert_mail(email_body)
|
| 291 |
|
|
print "\n\t>> !! ALERT E-MAIL SENT !! <<\n"
|
| 292 |
|
|
|
| 293 |
|
|
|
| 294 |
|
|
errors_log_name = file_path+'monitoring_errors.log.html'
|
| 295 |
|
|
errors_log_file = open(errors_log_name, 'a')
|
| 296 |
|
|
errors_log_file.write(email_body)
|
| 297 |
|
|
errors_log_file.close()
|
| 298 |
|
|
|
| 299 |
|
|
else:
|
| 300 |
|
|
print bcolors.OKGREEN+'\n No errors/inconsistencies found.\n'+bcolors.ENDC
|
| 301 |
|
|
|
| 302 |
|
|
|
| 303 |
|
|
|
| 304 |
|
|
if (len(sys.argv) > 1):
|
| 305 |
|
|
option = sys.argv[1]
|
| 306 |
|
|
if option=='--check': check_status()
|
| 307 |
|
|
elif option=='--record':
|
| 308 |
|
|
overwrite = raw_input(bcolors.WARNING+"\n This option will overwrite the current recorded settings. Do you really want to proceed?: "+ bcolors.ENDC)
|
| 309 |
|
|
if overwrite in ["Yes","yes","Y","y","oui", "OUI", "Oui"]: record_status()
|
| 310 |
|
|
elif overwrite in ["No","no","N", "n"]:
|
| 311 |
|
|
print "Aborted.\n"
|
| 312 |
|
|
exit(0)
|
| 313 |
|
|
else:
|
| 314 |
|
|
print bcolors.WARNING + "\n> Sintax:\t$ python dataset_Monitoring.py [OPTION]\n" + bcolors.ENDC
|
| 315 |
|
|
print "Options:\n\n --check\n\tA status consistency check is performed between the current and last recorded datasets properties."
|
| 316 |
|
|
print "\n --record\n\tThe current datasets properties (status and mapping) are recorded. NB. This procedure will overwrite the previously recorded entries.\n"
|
| 317 |
|
|
exit(0) |