-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert.py
More file actions
153 lines (152 loc) · 5.49 KB
/
convert.py
File metadata and controls
153 lines (152 loc) · 5.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import csv
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from tabulate import tabulate
# loads email and initial+surname as dict for fuzzy matching later on
def loadnames ( filename ):
# empty dictionary
conv_table = {}
with open(filename,'rt') as csvfile:
spamreader = csv.reader(csvfile,delimiter=',',quotechar='"')
for row in spamreader:
conv_table[row[1] + ' ' + row[2]] = row[3]
return conv_table
# Just list of buildings
def getbuildings ( ):
buildings = [
'Alexander Graham Bell building',
'Alrick Building',
'BioSpace',
'Eng Structures Lab',
'Erskine Williamson Building',
'Faraday Building',
'Fleeming Jenkin',
'Flowave',
'Hudson Beare Building',
'James Clark Maxwell Building',
'John Muir',
'Mary Bruck',
'Michael Swann Building',
'Peter Wilson Building',
'Sanderson Building',
'Scottish Microelectronics Centre',
'SMC',
'Technology Transfer Centre',
'William Rankine Building',
'Unknown'
]
return buildings
def getinstitutes ():
institutes = {
'Institute for Bioengineering' : 'IBIO',
'Institute for Digital Communications' : 'IDCOM',
'Institute for Energy Systems' : 'IES',
'Institute for Infrastructure and Environment' : 'IIE',
'Institute for Integrated Micro and Nano Systems' : 'INMS',
'Institute for Materials and Processes' : 'IMP'
}
return institutes
def flatten_dict(conv_table):
l = list()
for key, value in conv_table.items():
l.append(key)
return l
names = loadnames('data.csv')
flat_names = flatten_dict(names)
buildings = getbuildings()
with open('newconvert.csv','w') as outputfile, open('equipment.csv','rt') as inputfile:
admin_name = 'Admin Account'
admin_mail = 'fake@example.com'
spamreader = csv.reader(inputfile,delimiter=',',quotechar='"')
spamwriter = csv.writer(outputfile,delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)
for row in spamreader:
outlist = [None] * 50
outlist[1] = row[3] # Manufacturer
outlist[2] = row[4] # Model
outlist[4] = row[2] # Description
outlist[12] = row[14] # availability
outlist[13] = row[20] # restrictions
outlist[14] = row[17] # usergroup
outlist[15] = row[19] # Access
outlist[17] = row[1] # Category
#outlist[18] = row[10] # Institute (may need conversion??)
outlist[19] = "KB" # Site/Campus
# outlist[20] building need fuzzy matching!!
outlist[21] = row[11] # Room (raw location from input)
outlist[27] = row[6] # Manufacturer website
outlist[29] = row[18] # training_required?
outlist[37] = row[7] # Asset ID
outlist[38] = row[28] # Finance ID
outlist[39] = row[5] # Serial No
outlist[42] = row[24] # Date of Purchase
outlist[43] = row[25] # Purchase_cost
outlist[45] = row[23] # end_of_life
outlist[46] = row[22] # maintenance
outlist[49] = row[8] # comments
# Fuzzy match building:
if not row[11]:
row[11] = 'Unknown'
# a -- results from fuzzy matching
a = process.extractBests(row[11], buildings, limit=2)
if a[0][0] == "SMC":
outlist[20] = 'Scottish Microelectronics Centre'
else:
outlist[20] = a[0][0];
# Fuzzy match names and emails:
# Contact 1:
flat_names = flatten_dict(names)
b = process.extractBests(row[12],flat_names, limit=2, scorer=fuzz.token_set_ratio)
custodian_score = b[0][1]
outlist[22] = b[0][0] # contact 1 name
outlist[23] = names[b[0][0]] # contact 1 email
# Contact 2:
if row[13]:
c = process.extractBests(row[13],flat_names, limit=2, scorer=fuzz.token_set_ratio)
technical_score = c[0][1]
outlist[24] = c[0][0]
outlist[25] = names[c[0][0]]
else:
outlist[24] = ''
outlist[25] = ''
Comment = "|Building: " + row[11] + "\n"\
+ "|Owner: " + row[12] + "\n"\
+ "|Technical: " + row[13] + "\n"
outlist[49] += Comment
# It manufacturer and model is not given, use
# Description as title
if not outlist[1] and not outlist[2]:
outlist[0] = row[2]
institutes = getinstitutes()
if row[10]:
d = process.extract(row[10],institutes, limit=2)
outlist[18] = d[0][2]
else:
outlist[18] = 'School of Engineering'
#Show data, prompt for corrections:
row1 = [row[12], row[11], row[13]]
row2 = [outlist[22], outlist[20], outlist[24]]
print(tabulate([row1, row2]))
print(b, c)
ans = input('is everything correct?')
if ans[0] == 'y':
pass
elif ans[0] == 's':
outlist[22] = b[1][0]
outlist[23] = names[b[1][0]]
elif ans[0] == 'a':
outlist[22] = admin_name
outlist[23] = admin_mail
if len(ans) == 1:
pass
elif ans[1] == 's':
oustlist[24] = c[1][0]
outlist[25] = names[c[1][0]]
elif ans[1] == 'a':
outlist[24] = admin_name
outlist[25] = admin_mail
elif ans[1] == 'e':
outlist[24] = ''
outlist[25] = ''
print('\n\n\n')
# Write out the data
spamwriter.writerow(outlist)