blob: a4bd35dd2b2beddaa076af0e00d6dcb332937e15 [file] [log] [blame]
Michael Lando451a3402017-02-19 10:28:42 +02001import json
2import sys, getopt
3from collections import OrderedDict
4
5dict = {}
6dupliacteUid = {}
7#debugFlag = True
8debugFlag = False
9
10def join_strings(lst):
11 concat = ""
12 for string in lst:
13 if (string != None):
14 if (type(string) == int):
15 string = str(string)
16 concat += (string + " ")
17 return concat
18
19def debug(desc, *args):
20 'print only if debug enabled'
21 if (debugFlag == True):
22 print desc, join_strings(args)
23
24def log(desc, arg):
25 'print log info'
26 print desc, arg
27
28def getUid(vertex):
29 uid = None
30 nodeLabel=vertex.get('nodeLabel')
31 debug(nodeLabel)
32 if ( nodeLabel == 'user' ):
33 uid = vertex['userId']
34 elif ( nodeLabel == 'tag' ):
35 uid = vertex['name']
36 elif ( nodeLabel == None ):
37 pass
38 elif ( nodeLabel == 'lockNode' ):
39 uid = vertex.get('uid')
40 else: uid = vertex['uid']
41
42 debug(nodeLabel, uid)
43
44 return uid
45
46def generateFile(inputFile, outputFile):
47
48 with open(inputFile) as json_file:
49 dupliacteUid = {}
50 json_data = json.load(json_file)
51 for x in json_data['vertices']:
52 uid = getUid(x)
53
54 existId = dict.get(uid)
55 if (existId == None):
56 dict[uid] = x.get('_id')
57 else:
58 dupliacteUid[uid] = existId
59
60 log("duplicate ids", dupliacteUid)
61
62 json_data_vertices = json_data['vertices']
63 log("number of vertices is", len(json_data_vertices))
64
65 ids = {}
66 deleteIndexes = []
67
68 for i in xrange(len(json_data_vertices)):
69 #print "****** ", i, " *************"
70 #print json_data_vertices[i]
71 id = json_data_vertices[i]["_id"]
72 uid = getUid(json_data_vertices[i])
73 isDuplicateId = dupliacteUid.get(uid)
74 if (isDuplicateId != None):
75 debug("uid to id pair", uid if uid != None else 'None', id)
76 value = ids.get(uid)
77 if (value == None):
78 list = [id,]
79 ids[uid] = list
80 else:
81 value.append(id)
82 deleteIndexes.append(id)
83
84 log("ids", ids)
85 log("deleteIndexes", deleteIndexes)
86 log("deleteIndexes size", len(deleteIndexes))
87
88 filter_vertex = [ x for x in json_data_vertices if x.get('_id') not in deleteIndexes ]
89 json_data['vertices'] = filter_vertex
90
91 log("number of vertexes after filter", len(filter_vertex))
92
93 json_data_edges = json_data['edges']
94
95 log("number of edges", len(json_data_edges))
96
97 filter_edge = [ x for x in json_data_edges if x['_outV'] not in (deleteIndexes) and x['_inV'] not in (deleteIndexes) ]
98 json_data['edges'] = filter_edge
99
100 log("number of edges after filter", len(json_data['edges']))
101
102 json_data = OrderedDict(sorted(json_data.items(), key=lambda t: t[0], reverse=True))
103
104 with open(outputFile, 'w') as outfile:
105 #json.dump(json_data, outfile)
106 json.dump(json_data, outfile)
107 log("output file is", outputFile);
108
109def main(argv):
110 print 'Number of arguments:', len(sys.argv), 'arguments.'
111 inputfile = None
112 outputfile = ''
113 try:
114 opts, args = getopt.getopt(argv,"h:i:o:",["ifile=","ofile="])
115 except getopt.GetoptError:
116 print sys.argv[0], '-i <inputfile>'
117 sys.exit(2)
118 for opt, arg in opts:
119 if opt == '-h':
120 print sys.argv[0], '-i <inputfile>'
121 sys.exit(3)
122 elif opt in ("-i", "--ifile"):
123 inputfile = arg
124
125 if ( inputfile == None ):
126 print sys.argv[0] ,'-i <inputfile>'
127 sys.exit(3)
128
129 print 'Input file is "', inputfile
130 generateFile(inputfile, inputfile + '.noduplicates')
131
132
133if __name__ == "__main__":
134 main(sys.argv[1:])
135
136# print x['uid']