""" Modified on Sun Jul 28 2020 by Yunzhi Shi, DS @ AWS MLSL Cleaned up for the tutorial. Original author: avanetten """ import os, time, random import argparse import math import copy from p_tqdm import p_umap import numpy as np import pandas as pd import scipy.spatial import scipy.stats import networkx as nx import shapely.wkt from shapely.geometry import Point, LineString import utm import apls_utils import osmnx_funcs import graphTools ################################################################################ def add_travel_time(G_, speed_key='inferred_speed_mps', length_key='length', travel_time_key='travel_time_s', verbose=False): """ Compute and add travel time estimaes to each graph edge. Arguments --------- G_ : networkx graph Input networkx graph, with edges assumed to have a dictioary of properties that includes speed. speed_key : str Key in the edge properties dictionary to use for the edge speed. Defaults to ``'inferred_speed_mps'``. length_key : str Key in the edge properties dictionary to use for the edge length. Defaults to ``'length'`` (asumed to be in meters). travel_time_key : str Name to assign travel time in the edge properties dictionary. Defaults to ``'travel_time_s'``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. Returns ------- G_ : networkx graph Updated graph with travel time attached to each edge. """ for i, (u, v, data) in enumerate(G_.edges(data=True)): if speed_key in data: speed = data[speed_key] if type(speed) == list: speed = np.mean(speed) else: print("speed_key not found:", speed_key) return G_ if verbose: print("data[length_key]:", data[length_key]) print("speed:", speed) travel_time_seconds = data[length_key] / speed data[travel_time_key] = travel_time_seconds return G_ ################################################################################ def create_edge_linestrings(G_, remove_redundant=True, verbose=False): """ Ensure all edges have the 'geometry' tag, use shapely linestrings. Notes ----- If identical edges exist, remove extras. Arguments --------- G_ : networkx graph Input networkx graph, with edges assumed to have a dictioary of properties that may or may not include 'geometry'. remove_redundant : boolean Switch to remove identical edges, if they exist. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. Returns ------- G_ : networkx graph Updated graph with every edge containing the 'geometry' tag. """ # clean out redundant edges with identical geometry edge_seen_set = set([]) geom_seen = [] bad_edges = [] for i, (u, v, data) in enumerate(G_.edges(data=True)): # create linestring if no geometry reported if 'geometry' not in data: sourcex, sourcey = G_.nodes[u]['x'], G_.nodes[u]['y'] targetx, targety = G_.nodes[v]['x'], G_.nodes[v]['y'] line_geom = LineString([Point(sourcex, sourcey), Point(targetx, targety)]) data['geometry'] = line_geom # get reversed line coords = list(data['geometry'].coords)[::-1] line_geom_rev = LineString(coords) else: # check which direction linestring is travelling (it may be going # from v -> u, which means we need to reverse the linestring) # otherwise new edge is tangled line_geom = data['geometry'] u_loc = [G_.nodes[u]['x'], G_.nodes[u]['y']] v_loc = [G_.nodes[v]['x'], G_.nodes[v]['y']] geom_p0 = list(line_geom.coords)[0] dist_to_u = scipy.spatial.distance.euclidean(u_loc, geom_p0) dist_to_v = scipy.spatial.distance.euclidean(v_loc, geom_p0) coords = list(data['geometry'].coords)[::-1] line_geom_rev = LineString(coords) if dist_to_u > dist_to_v: data['geometry'] = line_geom_rev # flag redundant edges if remove_redundant: if i == 0: edge_seen_set = set([(u, v)]) edge_seen_set.add((v, u)) geom_seen.append(line_geom) else: if ((u, v) in edge_seen_set) or ((v, u) in edge_seen_set): # test if geoms have already been seen for geom_seen_tmp in geom_seen: if (line_geom == geom_seen_tmp) \ or (line_geom_rev == geom_seen_tmp): bad_edges.append((u, v)) if verbose: print("\nRedundant edge:", u, v) else: edge_seen_set.add((u, v)) geom_seen.append(line_geom) geom_seen.append(line_geom_rev) if remove_redundant: if verbose: print("\nedge_seen_set:", edge_seen_set) print("redundant edges:", bad_edges) for (u, v) in bad_edges: if G_.has_edge(u, v): G_.remove_edge(u, v) # , key) return G_ ################################################################################ def cut_linestring(line, distance, verbose=False): """ Cuts a shapely linestring at a specified distance from its starting point. Notes ---- Return orignal linestring if distance <= 0 or greater than the length of the line. Reference: http://toblerity.org/shapely/manual.html#linear-referencing-methods Arguments --------- line : shapely linestring Input shapely linestring to cut. distanct : float Distance from start of line to cut it in two. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. Returns ------- [line1, line2] : list Cut linestrings. If distance <= 0 or greater than the length of the line, return input line. """ if verbose: print("Cutting linestring at distance", distance, "...") if distance <= 0.0 or distance >= line.length: return [LineString(line)] # iterate through coorda and check if interpolated point has been passed # already or not coords = list(line.coords) for i, p in enumerate(coords): pdl = line.project(Point(p)) if verbose: print(i, p, "line.project point:", pdl) if pdl == distance: return [ LineString(coords[:i+1]), LineString(coords[i:])] if pdl > distance: cp = line.interpolate(distance) return [ LineString(coords[:i] + [(cp.x, cp.y)]), LineString([(cp.x, cp.y)] + coords[i:])] # if we've reached here then that means we've encountered a self-loop and # the interpolated point is between the final midpoint and the the original # node i = len(coords) - 1 cp = line.interpolate(distance) return [ LineString(coords[:i] + [(cp.x, cp.y)]), LineString([(cp.x, cp.y)] + coords[i:])] ################################################################################ def get_closest_edge_from_G(G_, point, nearby_nodes_set=set([]), verbose=False): """ Return closest edge to point, and distance to said edge. Notes ----- Just discovered a similar function: https://github.com/gboeing/osmnx/blob/master/osmnx/utils.py#L501 Arguments --------- G_ : networkx graph Input networkx graph, with edges assumed to have a dictioary of properties that includes the 'geometry' key. point : shapely Point Shapely point containing (x, y) coordinates. nearby_nodes_set : set Set of possible edge endpoints to search. If nearby_nodes_set is not empty, only edges with a node in this set will be checked (this can greatly speed compuation on large graphs). If nearby_nodes_set is empty, check all possible edges in the graph. Defaults to ``set([])``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. Returns ------- best_edge, min_dist, best_geom : tuple best_edge is the closest edge to the point min_dist is the distance to that edge best_geom is the geometry of the ege """ # get distances from point to lines dist_list = [] edge_list = [] geom_list = [] p = point # Point(point_coords) for i, (u, v, key, data) in enumerate(G_.edges(keys=True, data=True)): # skip if u,v not in nearby nodes if len(nearby_nodes_set) > 0: if (u not in nearby_nodes_set) and (v not in nearby_nodes_set): continue if verbose: print(("u,v,key,data:", u, v, key, data)) print((" type data['geometry']:", type(data['geometry']))) try: line = data['geometry'] except KeyError: line = data['attr_dict']['geometry'] geom_list.append(line) dist_list.append(p.distance(line)) edge_list.append([u, v, key]) # get closest edge min_idx = np.argmin(dist_list) min_dist = dist_list[min_idx] best_edge = edge_list[min_idx] best_geom = geom_list[min_idx] return best_edge, min_dist, best_geom ################################################################################ def insert_point_into_G(G_, point, node_id=100000, max_distance_meters=5, nearby_nodes_set=set([]), allow_renaming=True, verbose=False, super_verbose=False): """ Insert a new node in the graph closest to the given point. Notes ----- If the point is too far from the graph, don't insert a node. Assume all edges have a linestring geometry http://toblerity.org/shapely/manual.html#object.simplify Sometimes the point to insert will have the same coordinates as an existing point. If allow_renaming == True, relabel the existing node. convert linestring to multipoint? https://github.com/Toblerity/Shapely/issues/190 TODO : Implement a version without renaming that tracks which node is closest to the desired point. Arguments --------- G_ : networkx graph Input networkx graph, with edges assumed to have a dictioary of properties that includes the 'geometry' key. point : shapely Point Shapely point containing (x, y) coordinates node_id : int Unique identifier of node to insert. Defaults to ``100000``. max_distance_meters : float Maximum distance in meters between point and graph. Defaults to ``5``. nearby_nodes_set : set Set of possible edge endpoints to search. If nearby_nodes_set is not empty, only edges with a node in this set will be checked (this can greatly speed compuation on large graphs). If nearby_nodes_set is empty, check all possible edges in the graph. Defaults to ``set([])``. allow_renameing : boolean Switch to allow renaming of an existing node with node_id if the existing node is closest to the point. Defaults to ``False``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. super_verbose : boolean Switch to print mucho values to screen. Defaults to ``False``. Returns ------- G_, node_props, min_dist : tuple G_ is the updated graph node_props gives the properties of the inserted node min_dist is the distance from the point to the graph """ best_edge, min_dist, best_geom = get_closest_edge_from_G( G_, point, nearby_nodes_set=nearby_nodes_set, verbose=super_verbose) [u, v, key] = best_edge G_node_set = set(G_.nodes()) if verbose: print("Inserting point:", node_id) print("best edge:", best_edge) print(" best edge dist:", min_dist) u_loc = [G_.nodes[u]['x'], G_.nodes[u]['y']] v_loc = [G_.nodes[v]['x'], G_.nodes[v]['y']] print("ploc:", (point.x, point.y)) print("uloc:", u_loc) print("vloc:", v_loc) if min_dist > max_distance_meters: if verbose: print("min_dist > max_distance_meters, skipping...") return G_, {}, -1, -1 else: # updated graph # skip if node exists already if node_id in G_node_set: if verbose: print("Node ID:", node_id, "already exists, skipping...") return G_, {}, -1, -1 line_geom = best_geom # Length along line that is closest to the point line_proj = line_geom.project(point) # Now combine with interpolated point on line new_point = line_geom.interpolate(line_geom.project(point)) x, y = new_point.x, new_point.y ################# # create new node try: # first get zone, then convert to latlon _, _, zone_num, zone_letter = utm.from_latlon(G_.nodes[u]['lat'], G_.nodes[u]['lon']) # convert utm to latlon lat, lon = utm.to_latlon(x, y, zone_num, zone_letter) except: lat, lon = y, x # set properties node_props = {'highway': 'insertQ', 'lat': lat, 'lon': lon, 'osmid': node_id, 'x': x, 'y': y} # add node G_.add_node(node_id, **node_props) # assign, then update edge props for new edge _, _, edge_props_new = copy.deepcopy( list(G_.edges([u, v], data=True))[0]) # cut line split_line = cut_linestring(line_geom, line_proj) if split_line is None: print("Failure in cut_linestring()...") print("type(split_line):", type(split_line)) print("split_line:", split_line) print("line_geom:", line_geom) print("line_geom.length:", line_geom.length) print("line_proj:", line_proj) print("min_dist:", min_dist) return G_, {}, 0, 0 if verbose: print("split_line:", split_line) if len(split_line) == 1: if verbose: print("split line empty, min_dist:", min_dist) # get coincident node outnode = '' outnode_x, outnode_y = -1, -1 x_p, y_p = new_point.x, new_point.y x_u, y_u = G_.nodes[u]['x'], G_.nodes[u]['y'] x_v, y_v = G_.nodes[v]['x'], G_.nodes[v]['y'] # sometimes it seems that the nodes aren't perfectly coincident, # so see if it's within a buffer buff = 0.05 # meters if (abs(x_p - x_u) <= buff) and (abs(y_p - y_u) <= buff): outnode = u outnode_x, outnode_y = x_u, y_u elif (abs(x_p - x_v) <= buff) and (abs(y_p - y_v) <= buff): outnode = v outnode_x, outnode_y = x_v, y_v else: print("Error in determining node coincident with node: " + str(node_id) + " along edge: " + str(best_edge)) print("x_p, y_p:", x_p, y_p) print("x_u, y_u:", x_u, y_u) print("x_v, y_v:", x_v, y_v) # return return G_, {}, 0, 0 # if the line cannot be split, that means that the new node # is coincident with an existing node. Relabel, if desired if allow_renaming: node_props = G_.nodes[outnode] # A dictionary with the old labels as keys and new labels # as values. A partial mapping is allowed. mapping = {outnode: node_id} Gout = nx.relabel_nodes(G_, mapping) if verbose: print("Swapping out node ids:", mapping) return Gout, node_props, x_p, y_p else: # new node is already added, presumably at the exact location # of an existing node. So just remove the best edge and make # an edge from new node to existing node, length should be 0.0 line1 = LineString([new_point, Point(outnode_x, outnode_y)]) edge_props_line1 = edge_props_new.copy() edge_props_line1['length'] = line1.length edge_props_line1['geometry'] = line1 # make sure length is zero if line1.length > buff: print("Nodes should be coincident and length 0!") print(" line1.length:", line1.length) print(" x_u, y_u :", x_u, y_u) print(" x_v, y_v :", x_v, y_v) print(" x_p, y_p :", x_p, y_p) print(" new_point:", new_point) print(" Point(outnode_x, outnode_y):", Point(outnode_x, outnode_y)) return # add edge of length 0 from new node to neareest existing node G_.add_edge(node_id, outnode, **edge_props_line1) return G_, node_props, x, y else: # else, create new edges line1, line2 = split_line # get distances u_loc = [G_.nodes[u]['x'], G_.nodes[u]['y']] v_loc = [G_.nodes[v]['x'], G_.nodes[v]['y']] # compare to first point in linestring geom_p0 = list(line_geom.coords)[0] dist_to_u = scipy.spatial.distance.euclidean(u_loc, geom_p0) dist_to_v = scipy.spatial.distance.euclidean(v_loc, geom_p0) # reverse edge order if v closer than u if dist_to_v < dist_to_u: line2, line1 = split_line if verbose: print("Creating two edges from split...") print(" original_length:", line_geom.length) print(" line1_length:", line1.length) print(" line2_length:", line2.length) print(" u, dist_u_to_point:", u, dist_to_u) print(" v, dist_v_to_point:", v, dist_to_v) print(" min_dist:", min_dist) # add new edges edge_props_line1 = edge_props_new.copy() edge_props_line1['length'] = line1.length edge_props_line1['geometry'] = line1 # line2 edge_props_line2 = edge_props_new.copy() edge_props_line2['length'] = line2.length edge_props_line2['geometry'] = line2 # check which direction linestring is travelling (it may be going # from v -> u, which means we need to reverse the linestring) # otherwise new edge is tangled geom_p0 = list(line_geom.coords)[0] dist_to_u = scipy.spatial.distance.euclidean(u_loc, geom_p0) dist_to_v = scipy.spatial.distance.euclidean(v_loc, geom_p0) if dist_to_u < dist_to_v: G_.add_edge(u, node_id, **edge_props_line1) G_.add_edge(node_id, v, **edge_props_line2) else: G_.add_edge(node_id, u, **edge_props_line1) G_.add_edge(v, node_id, **edge_props_line2) if verbose: print("insert edges:", u, '-', node_id, 'and', node_id, '-', v) # remove initial edge G_.remove_edge(u, v, key) return G_, node_props, x, y ################################################################################ def insert_control_points(G_, control_points, max_distance_meters=10, allow_renaming=True, n_nodes_for_kd=1000, n_neighbors=20, x_coord='x', y_coord='y', verbose=True): """ Wrapper around insert_point_into_G() for all control_points. Notes ----- control_points are assumed to be of the format: [[node_id, x, y], ... ] TODO : Implement a version without renaming that tracks which node is closest to the desired point. Arguments --------- G_ : networkx graph Input networkx graph, with edges assumed to have a dictioary of properties that includes the 'geometry' key. control_points : array Points to insert in the graph, assumed to the of the format: [[node_id, x, y], ... ] max_distance_meters : float Maximum distance in meters between point and graph. Defaults to ``5``. allow_renameing : boolean Switch to allow renaming of an existing node with node_id if the existing node is closest to the point. Defaults to ``False``. n_nodes_for_kd : int Minumu size of graph to render to kdtree to speed node placement. Defaults to ``1000``. n_neighbors : int Number of neigbors to return if building a kdtree. Defaults to ``20``. x_coord : str Name of x_coordinate, can be 'x' or 'lon'. Defaults to ``'x'``. y_coord : str Name of y_coordinate, can be 'y' or 'lat'. Defaults to ``'y'``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. Returns ------- Gout, new_xs, new_ys : tuple Gout is the updated graph new_xs, new_ys are coordinates of the inserted points """ t0 = time.time() # insertion can be super slow so construct kdtree if a large graph if len(G_.nodes()) > n_nodes_for_kd: # construct kdtree of ground truth kd_idx_dic, kdtree, pos_arr = apls_utils.G_to_kdtree(G_) Gout = G_.copy() new_xs, new_ys = [], [] if len(G_.nodes()) == 0: return Gout, new_xs, new_ys for i, [node_id, x, y] in enumerate(control_points): if math.isinf(x) or math.isinf(y): print("Infinity in coords!:", x, y) return if verbose: # if (i % 20) == 0: print(i, "/", len(control_points), "Insert control point:", node_id, "x =", x, "y =", y) point = Point(x, y) # if large graph, determine nearby nodes if len(G_.nodes()) > n_nodes_for_kd: # get closest nodes node_names, dists_m_refine = apls_utils.nodes_near_point( x, y, kdtree, kd_idx_dic, x_coord=x_coord, y_coord=y_coord, n_neighbors=n_neighbors, verbose=False) nearby_nodes_set = set(node_names) else: nearby_nodes_set = set([]) # insert point Gout, node_props, xnew, ynew = insert_point_into_G( Gout, point, node_id=node_id, max_distance_meters=max_distance_meters, nearby_nodes_set=nearby_nodes_set, allow_renaming=allow_renaming, verbose=verbose) if (x != 0) and (y != 0): new_xs.append(xnew) new_ys.append(ynew) t1 = time.time() if verbose: print("Time to run insert_control_points():", t1-t0, "seconds") return Gout, new_xs, new_ys ################################################################################ def create_graph_midpoints(G_, linestring_delta=50, is_curved_eps=0.03, n_id_add_val=1, allow_renaming=True, verbose=False, super_verbose=False): """ Insert midpoint nodes into long edges on the graph. Arguments --------- G_ : networkx graph Input networkx graph, with edges assumed to have a dictioary of properties that includes the 'geometry' key. linestring_delta : float Distance in meters between linestring midpoints. Defaults to ``50``. is_curved_eps : float Minumum curvature for injecting nodes (if curvature is less than this value, no midpoints will be injected). If < 0, always inject points on line, regardless of curvature. Defaults to ``0.3``. n_id_add_val : int Sets min midpoint id above existing nodes e.g.: G.nodes() = [1,2,4], if n_id_add_val = 5, midpoints will be [9,10,11,...] allow_renameing : boolean Switch to allow renaming of an existing node with node_id if the existing node is closest to the point. Defaults to ``False``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. super_verbose : boolean Switch to print mucho values to screen. Defaults to ``False``. Returns ------- Gout, xms, yms : tuple Gout is the updated graph xms, yms are coordinates of the inserted points """ if len(G_.nodes()) == 0: return G_, [], [] # midpoints xms, yms = [], [] Gout = G_.copy() midpoint_name_val, midpoint_name_inc = np.max(G_.nodes())+n_id_add_val, 1 for u, v, data in G_.edges(data=True): # curved line if 'geometry' in data: # first edge props and get utm zone and letter edge_props_init = G_.edges([u, v]) linelen = data['length'] line = data['geometry'] ################# # ignore empty line if linelen == 0: continue # check if curved or not minx, miny, maxx, maxy = line.bounds # get euclidean distance dst = scipy.spatial.distance.euclidean([minx, miny], [maxx, maxy]) # ignore if almost straight if np.abs(dst - linelen) / linelen < is_curved_eps: continue ################# ################# # also ignore super short lines if linelen < 0.75*linestring_delta: continue ################# if verbose: print("create_graph_midpoints()...") print(" u,v:", u, v) print(" data:", data) print(" edge_props_init:", edge_props_init) # interpolate midpoints # if edge is short, use midpoint, else get evenly spaced points if linelen <= linestring_delta: interp_dists = [0.5 * line.length] else: # get evenly spaced points npoints = len(np.arange(0, linelen, linestring_delta)) + 1 interp_dists = np.linspace(0, linelen, npoints)[1:-1] if verbose: print(" interp_dists:", interp_dists) # create nodes node_id_new_list = [] xms_tmp, yms_tmp = [], [] for j, d in enumerate(interp_dists): if verbose: print(" ", j, "interp_dist:", d) midPoint = line.interpolate(d) xm0, ym0 = midPoint.xy xm = xm0[-1] ym = ym0[-1] point = Point(xm, ym) xms.append(xm) yms.append(ym) xms_tmp.append(xm) yms_tmp.append(ym) if verbose: print(" midpoint:", xm, ym) # add node to graph, with properties of u node_id = midpoint_name_val midpoint_name_val += midpoint_name_inc node_id_new_list.append(node_id) if verbose: print(" node_id:", node_id) # add to graph Gout, node_props, _, _ = insert_point_into_G( Gout, point, node_id=node_id, allow_renaming=allow_renaming, verbose=super_verbose) return Gout, xms, yms ################################################################################ def _clean_sub_graphs(G_, min_length=80, max_nodes_to_skip=100, weight='length', verbose=True, super_verbose=False): """ Remove subgraphs with a max path length less than min_length, if the subgraph has more than max_noxes_to_skip, don't check length (this step great reduces processing time) """ if len(G_.nodes()) == 0: return G_ if verbose: print("Running clean_sub_graphs...") try: sub_graphs = list(nx.connected_component_subgraphs(G_)) except: sub_graph_nodes = nx.connected_components(G_) sub_graphs = [G_.subgraph(c).copy() for c in sub_graph_nodes] bad_nodes = [] if verbose: print(" len(G_.nodes()):", len(G_.nodes())) print(" len(G_.edges()):", len(G_.edges())) if super_verbose: print("G_.nodes:", G_.nodes()) edge_tmp = G_.edges()[np.random.randint(len(G_.edges()))] print(edge_tmp, "G.edge props:", G_.edges[edge_tmp[0]][edge_tmp[1]]) for G_sub in sub_graphs: # don't check length if too many nodes in subgraph if len(G_sub.nodes()) > max_nodes_to_skip: continue else: all_lengths = dict( nx.all_pairs_dijkstra_path_length(G_sub, weight=weight)) if super_verbose: print(" \nGs.nodes:", G_sub.nodes()) print(" all_lengths:", all_lengths) # get all lenghts lens = [] for u in all_lengths.keys(): v = all_lengths[u] for uprime in v.keys(): vprime = v[uprime] lens.append(vprime) if super_verbose: print(" u, v", u, v) print(" uprime, vprime:", uprime, vprime) max_len = np.max(lens) if super_verbose: print(" Max length of path:", max_len) if max_len < min_length: bad_nodes.extend(G_sub.nodes()) if super_verbose: print(" appending to bad_nodes:", G_sub.nodes()) # remove bad_nodes G_.remove_nodes_from(bad_nodes) if verbose: print(" num bad_nodes:", len(bad_nodes)) # print ("bad_nodes:", bad_nodes) print(" len(G'.nodes()):", len(G_.nodes())) print(" len(G'.edges()):", len(G_.edges())) if super_verbose: print(" G_.nodes:", G_.nodes()) return G_ ################################################################################ def _create_gt_graph(geoJson, im_test_file, network_type='all_private', valid_road_types=set([]), osmidx=0, osmNodeidx=0, subgraph_filter_weight='length', min_subgraph_length=5, travel_time_key='travel_time_s', speed_key='inferred_speed_mps', verbose=False, super_verbose=False): '''Ingest graph from geojson file and refine''' t0 = time.time() if verbose: print("Executing graphTools.create_graphGeoJson()...") G0gt_init = graphTools.create_graphGeoJson( geoJson, name='unnamed', retain_all=True, network_type=network_type, valid_road_types=valid_road_types, osmidx=osmidx, osmNodeidx=osmNodeidx, verbose=verbose) t1 = time.time() if verbose: print("Time to run create_graphGeoJson():", t1 - t0, "seconds") # refine graph G_gt = _refine_gt_graph(G0gt_init, im_test_file, subgraph_filter_weight=subgraph_filter_weight, min_subgraph_length=min_subgraph_length, travel_time_key=travel_time_key, speed_key=speed_key, verbose=verbose, super_verbose=super_verbose) return G_gt, G0gt_init ################################################################################ def _refine_gt_graph(G0gt_init, im_test_file, subgraph_filter_weight='length', min_subgraph_length=5, travel_time_key='travel_time_s', speed_key='inferred_speed_mps', verbose=False, super_verbose=False): """refine ground truth graph""" t1 = time.time() # save latlon geometry (osmnx overwrites the 'geometry' tag) # also compute pixel geom for i, (u, v, key, data) in enumerate(G0gt_init.edges(keys=True, data=True)): if 'geometry' not in data: sourcex, sourcey = G0gt_init.nodes[u]['x'], G0gt_init.nodes[u]['y'] targetx, targety = G0gt_init.nodes[v]['x'], G0gt_init.nodes[v]['y'] line_geom = LineString([Point(sourcex, sourcey), Point(targetx, targety)]) else: line_geom = data['geometry'] data['geometry_latlon'] = line_geom.wkt if os.path.exists(im_test_file): # get pixel geom (do this after simplify so that we don't have to # collapse the lines (see apls_wkt_to_G.wkt_to_G) geom_pix = apls_utils.geomGeo2geomPixel(line_geom, input_raster=im_test_file) data['geometry_pix'] = geom_pix.wkt data['length_pix'] = geom_pix.length if len(G0gt_init.nodes()) == 0: return G0gt_init G0gt = osmnx_funcs.project_graph(G0gt_init) if verbose: print("len G0gt.nodes():", len(G0gt.nodes())) print("len G0gt.edges:", len(G0gt.edges())) if verbose: print("Simplifying graph...") try: G2gt_init0 = osmnx_funcs.simplify_graph(G0gt).to_undirected() except: print("Cannot simplify graph, using original") G2gt_init0 = G0gt # make sure all edges have a geometry assigned to them G2gt_init1 = create_edge_linestrings( G2gt_init0.copy(), remove_redundant=True) t2 = time.time() if verbose: print("Time to project, simplify, and create linestrings:", t2 - t1, "seconds") # clean up connected components G2gt_init2 = _clean_sub_graphs( G2gt_init1.copy(), min_length=min_subgraph_length, weight=subgraph_filter_weight, verbose=verbose, super_verbose=super_verbose) # add pixel coords try: if os.path.exists(im_test_file): G_gt_almost, _, gt_graph_coords = apls_utils._set_pix_coords( G2gt_init2.copy(), im_test_file) else: G_gt_almost = G2gt_init2 except: pass # !!!!!!!!!!!!!!! # ensure nodes have coorect xpix and ypix since _set_pix_coords is faulty! for j, n in enumerate(G_gt_almost.nodes()): x, y = G_gt_almost.nodes[n]['x'], G_gt_almost.nodes[n]['y'] geom_pix = apls_utils.geomGeo2geomPixel(Point(x, y), input_raster=im_test_file) [(xp, yp)] = list(geom_pix.coords) G_gt_almost.nodes[n]['x_pix'] = xp G_gt_almost.nodes[n]['y_pix'] = yp # update pixel and lat lon geometries that get turned into lists upon # simplify() that produces a 'geometry' tag in wmp if verbose: print("Merge 'geometry' linestrings...") keys_tmp = ['geometry_pix', 'geometry_latlon'] for i, (u, v, attr_dict) in enumerate(G_gt_almost.edges(data=True)): for key_tmp in keys_tmp: if key_tmp not in attr_dict.keys(): continue if super_verbose: print("Merge", key_tmp, "...") geom = attr_dict[key_tmp] if type(geom) == list: # check if the list items are wkt strings, if so, create # linestrigs # or (type(geom_pix[0]) == unicode): if (type(geom[0]) == str): geom = [shapely.wkt.loads(ztmp) for ztmp in geom] # merge geoms attr_dict[key_tmp] = shapely.ops.linemerge(geom) elif type(geom) == str: attr_dict[key_tmp] = shapely.wkt.loads(geom) else: pass # update wkt_pix? if 'wkt_pix' in attr_dict.keys(): attr_dict['wkt_pix'] = attr_dict['geometry_pix'].wkt # update 'length_pix' if 'length_pix' in attr_dict.keys(): attr_dict['length_pix'] = np.sum([attr_dict['length_pix']]) # check if simplify created various speeds on an edge speed_keys = [speed_key, 'inferred_speed_mph', 'inferred_speed_mps'] for sk in speed_keys: if sk not in attr_dict.keys(): continue if type(attr_dict[sk]) == list: if verbose: print(" Taking mean of multiple speeds on edge:", u, v) attr_dict[sk] = np.mean(attr_dict[sk]) if verbose: print("u, v, speed_key, attr_dict)[speed_key]:", u, v, sk, attr_dict[sk]) # add travel time G_gt = add_travel_time(G_gt_almost.copy(), speed_key=speed_key, travel_time_key=travel_time_key) return G_gt ################################################################################ def make_graphs(G_gt_, G_p_, weight='length', speed_key='inferred_speed_mps', travel_time_key='travel_time_s', max_nodes_for_midpoints=500, linestring_delta=50, is_curved_eps=0.012, max_snap_dist=4, allow_renaming=True, verbose=False, super_verbose=False): """ Match nodes in ground truth and propsal graphs, and get paths. Notes ----- The path length dictionaries returned by this function will be fed into compute_metric(). Arguments --------- G_gt_ : networkx graph Ground truth graph. G_p_ : networkd graph Proposal graph over the same region. weight : str Key in the edge properties dictionary to use for the path length weight. Defaults to ``'length'``. speed_key : str Key in the edge properties dictionary to use for the edge speed. Defaults to ``'inferred_speed_mps'``. travel_time_key : str Name to assign travel time in the edge properties dictionary. Defaults to ``'travel_time_s'``. max_nodes_for_midpoints : int Maximum number of gt nodes to inject midpoints. If there are more gt nodes than this, skip midpoints and use this number of points to comput APLS. linestring_delta : float Distance in meters between linestring midpoints. If len gt nodes > max_nodes_for_midppoints this argument is ignored. Defaults to ``50``. is_curved_eps : float Minumum curvature for injecting nodes (if curvature is less than this value, no midpoints will be injected). If < 0, always inject points on line, regardless of curvature. If len gt nodes > max_nodes_for_midppoints this argument is ignored. Defaults to ``0.012``. max_snap_dist : float Maximum distance a node can be snapped onto a graph. Defaults to ``4``. allow_renameing : boolean Switch to allow renaming of an existing node with node_id if the existing node is closest to the point. Defaults to ``False``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. super_verbose : boolean Switch to print mucho values to screen. Defaults to ``False``. Return ------ G_gt_cp, G_p_cp, G_gt_cp_prime, G_p_cp_prime, \ control_points_gt, control_points_prop, \ all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, \ all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime : tuple G_gt_cp is ground truth with control points inserted G_p_cp is proposal with control points inserted G_gt_cp_prime is ground truth with control points from prop inserted G_p_cp_prime is proposal with control points from gt inserted all_pairs_lengths_gt_native is path length dict corresponding to G_gt_cp all_pairs_lengths_prop_native is path length dict corresponding to G_p_cp all_pairs_lengths_gt_prime is path length dict corresponding to G_gt_cp_prime all_pairs_lenfgths_prop_prime is path length dict corresponding to G_p_cp_prime """ t0 = time.time() for i, (u, v, data) in enumerate(G_gt_.edges(keys=False, data=True)): if weight not in data.keys(): print("Error!", weight, "not in G_gt_ edge u, v, data:", u, v, data) return for i, (u, v, key, data) in enumerate(G_gt_.edges(keys=True, data=True)): try: line = data['geometry'] except KeyError: line = data[0]['geometry'] if type(line) == str: # or type(line) == unicode: data['geometry'] = shapely.wkt.loads(line) # create graph with midpoints G_gt0 = create_edge_linestrings(G_gt_.to_undirected()) if verbose: print("len G_gt.nodes():", len(list(G_gt0.nodes()))) print("len G_gt.edges():", len(list(G_gt0.edges()))) if verbose: print("Creating gt midpoints") G_gt_cp0, xms, yms = create_graph_midpoints( G_gt0.copy(), linestring_delta=linestring_delta, is_curved_eps=is_curved_eps, verbose=False) # add travel time G_gt_cp = add_travel_time(G_gt_cp0.copy(), speed_key=speed_key, travel_time_key=travel_time_key) # get ground truth control points control_points_gt = [] for n in G_gt_cp.nodes(): u_x, u_y = G_gt_cp.nodes[n]['x'], G_gt_cp.nodes[n]['y'] control_points_gt.append([n, u_x, u_y]) if verbose: print("len control_points_gt:", len(control_points_gt)) # get ground truth paths if verbose: print("Get ground truth paths...") all_pairs_lengths_gt_native = dict( nx.shortest_path_length(G_gt_cp, weight=weight)) ############### # Proposal for i, (u, v, data) in enumerate(G_p_.edges(keys=False, data=True)): if weight not in data.keys(): print("Error!", weight, "not in G_p_ edge u, v, data:", u, v, data) return # get proposal graph with native midpoints for i, (u, v, key, data) in enumerate(G_p_.edges(keys=True, data=True)): try: line = data['geometry'] except: line = data[0]['geometry'] if type(line) == str: # or type(line) == unicode: data['geometry'] = shapely.wkt.loads(line) G_p0 = create_edge_linestrings(G_p_.to_undirected()) # add travel time G_p = add_travel_time(G_p0.copy(), speed_key=speed_key, travel_time_key=travel_time_key) if verbose: print("len G_p.nodes():", len(G_p.nodes())) print("len G_p.edges():", len(G_p.edges())) if verbose: print("Creating proposal midpoints") G_p_cp0, xms_p, yms_p = create_graph_midpoints( G_p.copy(), linestring_delta=linestring_delta, is_curved_eps=is_curved_eps, verbose=False) # add travel time G_p_cp = add_travel_time(G_p_cp0.copy(), speed_key=speed_key, travel_time_key=travel_time_key) if verbose: print("len G_p_cp.nodes():", len(G_p_cp.nodes())) print("len G_p_cp.edges():", len(G_p_cp.edges())) # set proposal control nodes, originally just all nodes in G_p_cp # original method sets proposal control points as all nodes in G_p_cp # get proposal control points control_points_prop = [] for n in G_p_cp.nodes(): u_x, u_y = G_p_cp.nodes[n]['x'], G_p_cp.nodes[n]['y'] control_points_prop.append([n, u_x, u_y]) # get paths all_pairs_lengths_prop_native = dict( nx.shortest_path_length(G_p_cp, weight=weight)) ############### # insert gt control points into proposal if verbose: print("Inserting", len(control_points_gt), "control points into G_p...") print("G_p.nodes():", G_p.nodes()) G_p_cp_prime0, xn_p, yn_p = insert_control_points( G_p.copy(), control_points_gt, max_distance_meters=max_snap_dist, allow_renaming=allow_renaming, verbose=super_verbose) # add travel time G_p_cp_prime = add_travel_time(G_p_cp_prime0.copy(), speed_key=speed_key, travel_time_key=travel_time_key) ############### # now insert control points into ground truth if verbose: print("\nInserting", len(control_points_prop), "control points into G_gt...") # permit renaming of inserted nodes if coincident with existing node G_gt_cp_prime0, xn_gt, yn_gt = insert_control_points( G_gt_, control_points_prop, max_distance_meters=max_snap_dist, allow_renaming=allow_renaming, verbose=super_verbose) # add travel time G_gt_cp_prime = add_travel_time(G_gt_cp_prime0.copy(), speed_key=speed_key, travel_time_key=travel_time_key) ############### # get paths all_pairs_lengths_gt_prime = dict( nx.shortest_path_length(G_gt_cp_prime, weight=weight)) all_pairs_lengths_prop_prime = dict( nx.shortest_path_length(G_p_cp_prime, weight=weight)) tf = time.time() if verbose: print("Time to run make_graphs in apls.py:", tf - t0, "seconds") return G_gt_cp, G_p_cp, G_gt_cp_prime, G_p_cp_prime, \ control_points_gt, control_points_prop, \ all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, \ all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime ################################################################################ def make_graphs_yuge(G_gt, G_p, weight='length', speed_key='inferred_speed_mps', travel_time_key='travel_time_s', max_nodes=500, max_snap_dist=4, allow_renaming=True, verbose=True, super_verbose=False): """ Match nodes in large ground truth and propsal graphs, and get paths. Notes ----- Skip midpoint injection and only select a subset of routes to compare. The path length dictionaries returned by this function will be fed into compute_metric(). Arguments --------- G_gt : networkx graph Ground truth graph. G_p : networkd graph Proposal graph over the same region. weight : str Key in the edge properties dictionary to use for the path length weight. Defaults to ``'length'``. speed_key : str Key in the edge properties dictionary to use for the edge speed. Defaults to ``'inferred_speed_mps'``. travel_time_key : str Name to assign travel time in the edge properties dictionary. Defaults to ``'travel_time_s'``. max_nodess : int Maximum number of gt nodes to inject midpoints. If there are more gt nodes than this, skip midpoints and use this number of points to comput APLS. max_snap_dist : float Maximum distance a node can be snapped onto a graph. Defaults to ``4``. allow_renameing : boolean Switch to allow renaming of an existing node with node_id if the existing node is closest to the point. Defaults to ``False``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. super_verbose : boolean Switch to print mucho values to screen. Defaults to ``False``. Return ------ G_gt_cp, G_p_cp, G_gt_cp_prime, G_p_cp_prime, \ control_points_gt, control_points_prop, \ all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, \ all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime : tuple G_gt_cp is ground truth with control points inserted G_p_cp is proposal with control points inserted G_gt_cp_prime is ground truth with control points from prop inserted G_p_cp_prime is proposal with control points from gt inserted all_pairs_lengths_gt_native is path length dict corresponding to G_gt_cp all_pairs_lengths_prop_native is path length dict corresponding to G_p_cp all_pairs_lengths_gt_prime is path length dict corresponding to G_gt_cp_prime all_pairs_lenfgths_prop_prime is path length dict corresponding to G_p_cp_prime """ t0 = time.time() for i, (u, v, key, data) in enumerate(G_gt.edges(keys=True, data=True)): try: line = data['geometry'] except: line = data[0]['geometry'] if type(line) == str: # or type(line) == unicode: data['geometry'] = shapely.wkt.loads(line) for i, (u, v, key, data) in enumerate(G_p.edges(keys=True, data=True)): try: line = data['geometry'] except: line = data[0]['geometry'] if type(line) == str: # or type(line) == unicode: data['geometry'] = shapely.wkt.loads(line) # create graph with linestrings? G_gt_cp = G_gt.to_undirected() if verbose: print("len(G_gt.nodes()):", len(G_gt_cp.nodes())) print("len(G_gt.edges()):", len(G_gt_cp.edges())) # gt node and edge props node = random.choice(list(G_gt.nodes())) print("node:", node, "G_gt random node props:", G_gt.nodes[node]) edge_tmp = random.choice(list(G_gt.edges())) print("G_gt edge_tmp:", edge_tmp) try: print("edge:", edge_tmp, "G_gt random edge props:", G_gt.edges[edge_tmp[0]][edge_tmp[1]]) except: try: print("edge:", edge_tmp, "G_gt random edge props:", G_gt.edges[edge_tmp[0], edge_tmp[1], 0]) except: pass # prop node and edge props node = random.choice(list(G_p.nodes())) print("node:", node, "G_p random node props:", G_p.nodes[node]) edge_tmp = random.choice(list(G_p.edges())) print("G_p edge_tmp:", edge_tmp) try: print("edge:", edge_tmp, "G_p random edge props:", G_p.edges[edge_tmp[0]][edge_tmp[1]]) except: try: print("edge:", edge_tmp, "G_p random edge props:", G_p.edges[edge_tmp[0], edge_tmp[1], 0]) except: pass # get ground truth control points, which will be a subset of nodes sample_size = min(max_nodes, len(G_gt_cp.nodes())) rand_nodes_gt = random.sample(G_gt_cp.nodes(), sample_size) rand_nodes_gt_set = set(rand_nodes_gt) control_points_gt = [] for itmp,n in enumerate(rand_nodes_gt): if verbose and (i % 20) == 0: print ("control_point", itmp, ":", n, ":", G_gt_cp.nodes[n]) u_x, u_y = G_gt_cp.nodes[n]['x'], G_gt_cp.nodes[n]['y'] control_points_gt.append([n, u_x, u_y]) if verbose: print("len control_points_gt:", len(control_points_gt)) # add travel time G_gt_cp = add_travel_time(G_gt_cp, speed_key=speed_key, travel_time_key=travel_time_key) # get route lengths between all control points # gather all paths from nodes of interest, keep only routes to control nodes tt = time.time() if verbose: print("Computing all_pairs_lengths_gt_native...") all_pairs_lengths_gt_native = {} for itmp, source in enumerate(rand_nodes_gt): if verbose and ((itmp % 50) == 0): print((itmp, "source:", source)) paths_tmp = nx.single_source_dijkstra_path_length( G_gt_cp, source, weight=weight) # delete items for k in list(paths_tmp.keys()): if k not in rand_nodes_gt_set: del paths_tmp[k] all_pairs_lengths_gt_native[source] = paths_tmp if verbose: print(("Time to compute all source routes for", sample_size, "nodes:", time.time() - tt, "seconds")) ############### # get proposal graph with native midpoints G_p_cp = G_p.to_undirected() if verbose: print("len G_p_cp.nodes():", len(G_p_cp.nodes())) print("G_p_cp.edges():", len(G_p_cp.edges())) # get control points, which will be a subset of nodes # (original method sets proposal control points as all nodes in G_p_cp) sample_size = min(max_nodes, len(G_p_cp.nodes())) rand_nodes_p = random.sample(G_p_cp.nodes(), sample_size) rand_nodes_p_set = set(rand_nodes_p) control_points_prop = [] for n in rand_nodes_p: u_x, u_y = G_p_cp.nodes[n]['x'], G_p_cp.nodes[n]['y'] control_points_prop.append([n, u_x, u_y]) # add travel time G_p_cp = add_travel_time(G_p_cp, speed_key=speed_key, travel_time_key=travel_time_key) # get paths # gather all paths from nodes of interest, keep only routes to control nodes tt = time.time() if verbose: print("Computing all_pairs_lengths_prop_native...") all_pairs_lengths_prop_native = {} for itmp, source in enumerate(rand_nodes_p): if verbose and ((itmp % 50) == 0): print((itmp, "source:", source)) paths_tmp = nx.single_source_dijkstra_path_length( G_p_cp, source, weight=weight) # delete items for k in list(paths_tmp.keys()): if k not in rand_nodes_p_set: del paths_tmp[k] all_pairs_lengths_prop_native[source] = paths_tmp if verbose: print(("Time to compute all source routes for", max_nodes, "nodes:", time.time() - tt, "seconds")) ############### # insert gt control points into proposal if verbose: print("Inserting", len(control_points_gt), "control points into G_p...") print("len G_p.nodes():", len(G_p.nodes())) G_p_cp_prime, xn_p, yn_p = insert_control_points( G_p.copy(), control_points_gt, max_distance_meters=max_snap_dist, allow_renaming=allow_renaming, verbose=super_verbose) # add travel time G_p_cp_prime = add_travel_time(G_p_cp_prime, speed_key=speed_key, travel_time_key=travel_time_key) ############### # now insert control points into ground truth if verbose: print("\nInserting", len(control_points_prop), "control points into G_gt...") # permit renaming of inserted nodes if coincident with existing node G_gt_cp_prime, xn_gt, yn_gt = insert_control_points( G_gt, control_points_prop, max_distance_meters=max_snap_dist, allow_renaming=allow_renaming, verbose=super_verbose) G_gt_cp_prime = add_travel_time(G_gt_cp_prime, speed_key=speed_key, travel_time_key=travel_time_key) ############### # get paths for graphs_prime # gather all paths from nodes of interest, keep only routes to control nodes # gt_prime tt = time.time() all_pairs_lengths_gt_prime = {} if verbose: print("Computing all_pairs_lengths_gt_prime...") G_gt_cp_prime_nodes_set = set(G_gt_cp_prime.nodes()) for itmp, source in enumerate(rand_nodes_p_set): if verbose and ((itmp % 50) == 0): print((itmp, "source:", source)) if source in G_gt_cp_prime_nodes_set: paths_tmp = nx.single_source_dijkstra_path_length( G_gt_cp_prime, source, weight=weight) # delete items for k in list(paths_tmp.keys()): if k not in rand_nodes_p_set: del paths_tmp[k] all_pairs_lengths_gt_prime[source] = paths_tmp if verbose: print(("Time to compute all source routes for", max_nodes, "nodes:", time.time() - tt, "seconds")) # prop_prime tt = time.time() all_pairs_lengths_prop_prime = {} if verbose: print("Computing all_pairs_lengths_prop_prime...") G_p_cp_prime_nodes_set = set(G_p_cp_prime.nodes()) for itmp, source in enumerate(rand_nodes_gt_set): if verbose and ((itmp % 50) == 0): print((itmp, "source:", source)) if source in G_p_cp_prime_nodes_set: paths_tmp = nx.single_source_dijkstra_path_length( G_p_cp_prime, source, weight=weight) # delete items for k in list(paths_tmp.keys()): if k not in rand_nodes_gt_set: del paths_tmp[k] all_pairs_lengths_prop_prime[source] = paths_tmp if verbose: print(("Time to compute all source routes for", max_nodes, "nodes:", time.time() - tt, "seconds")) ############### tf = time.time() if verbose: print("Time to run make_graphs_yuge in apls.py:", tf - t0, "seconds") return G_gt_cp, G_p_cp, G_gt_cp_prime, G_p_cp_prime, \ control_points_gt, control_points_prop, \ all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, \ all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime ################################################################################ def single_path_metric(len_gt, len_prop, diff_max=1): """ Compute APLS metric for single path. Notes ----- Compute normalize path difference metric, if len_prop < 0, return diff_max Arguments --------- len_gt : float Length of ground truth edge. len_prop : float Length of proposal edge. diff_max : float Maximum value to return. Defaults to ``1``. Returns ------- metric : float Normalized path difference. """ if len_gt <= 0: return 0 elif len_prop < 0 and len_gt > 0: return diff_max else: diff_raw = np.abs(len_gt - len_prop) / len_gt return np.min([diff_max, diff_raw]) ################################################################################ def path_sim_metric(all_pairs_lengths_gt, all_pairs_lengths_prop, control_nodes=[], min_path_length=10, diff_max=1, missing_path_len=-1, normalize=True, verbose=False): """ Compute metric for multiple paths. Notes ----- Assume nodes in ground truth and proposed graph have the same names. Assume graph is undirected so don't evaluate routes in both directions control_nodes is the list of nodes to actually evaluate; if empty do all in all_pairs_lenghts_gt min_path_length is the minimum path length to evaluate https://networkx.github.io/documentation/networkx-2.2/reference/algorithms/shortest_paths.html Parameters ---------- all_pairs_lengths_gt : dict Dictionary of path lengths for ground truth graph. all_pairs_lengths_prop : dict Dictionary of path lengths for proposal graph. control_nodes : list List of control nodes to evaluate. min_path_length : float Minimum path length to evaluate. diff_max : float Maximum value to return. Defaults to ``1``. missing_path_len : float Value to assign a missing path. Defaults to ``-1``. normalize : boolean Switch to normalize outputs. Defaults to ``True``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. Returns ------- C, diffs, routes, diff_dic C is the APLS score diffs is a list of the the route differences routes is a list of routes diff_dic is a dictionary of path differences """ diffs = [] routes = [] diff_dic = {} gt_start_nodes_set = set(all_pairs_lengths_gt.keys()) prop_start_nodes_set = set(all_pairs_lengths_prop.keys()) t0 = time.time() if len(gt_start_nodes_set) == 0: return 0, [], [], {} # set nodes to inspect if len(control_nodes) == 0: good_nodes = list(all_pairs_lengths_gt.keys()) else: good_nodes = control_nodes if verbose: print("\nComputing path_sim_metric()...") print("good_nodes:", good_nodes) # iterate overall start nodes for start_node in good_nodes: if verbose: print("start node:", start_node) node_dic_tmp = {} # if we are not careful with control nodes, it's possible that the # start node will not be in all_pairs_lengths_gt, in this case use max # diff for all routes to that node # if the start node is missing from proposal, use maximum diff for # all possible routes to that node if start_node not in gt_start_nodes_set: for end_node, len_prop in all_pairs_lengths_prop[start_node].items(): diffs.append(diff_max) routes.append([start_node, end_node]) node_dic_tmp[end_node] = diff_max return paths = all_pairs_lengths_gt[start_node] # CASE 1 # if the start node is missing from proposal, use maximum diff for # all possible routes to the start node if start_node not in prop_start_nodes_set: for end_node, len_gt in paths.items(): if (end_node != start_node) and (end_node in good_nodes): diffs.append(diff_max) routes.append([start_node, end_node]) node_dic_tmp[end_node] = diff_max diff_dic[start_node] = node_dic_tmp continue # else get proposed paths else: paths_prop = all_pairs_lengths_prop[start_node] # get set of all nodes in paths_prop, and missing_nodes end_nodes_gt_set = set(paths.keys()).intersection(good_nodes) end_nodes_prop_set = set(paths_prop.keys()) missing_nodes = end_nodes_gt_set - end_nodes_prop_set if verbose: print("missing nodes:", missing_nodes) # iterate over all paths from node for end_node in end_nodes_gt_set: len_gt = paths[end_node] # skip if too short if len_gt < min_path_length: continue # get proposed path if end_node in end_nodes_prop_set: # CASE 2, end_node in both paths and paths_prop, so # valid path exists len_prop = paths_prop[end_node] else: # CASE 3: end_node in paths but not paths_prop, so assign # length as diff_max len_prop = missing_path_len if verbose: print("end_node:", end_node) print(" len_gt:", len_gt) print(" len_prop:", len_prop) # compute path difference metric diff = single_path_metric(len_gt, len_prop, diff_max=diff_max) diffs.append(diff) routes.append([start_node, end_node]) node_dic_tmp[end_node] = diff diff_dic[start_node] = node_dic_tmp if len(diffs) == 0: return 0, [], [], {} # compute Cost diff_tot = np.sum(diffs) if normalize: norm = len(diffs) diff_norm = diff_tot / norm C = 1. - diff_norm else: C = diff_tot if verbose: print("Time to compute metric (score = ", C, ") for ", len(diffs), "routes:", time.time() - t0, "seconds") return C, diffs, routes, diff_dic ################################################################################ def compute_apls_metric(all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime, control_points_gt, control_points_prop, min_path_length=10, verbose=False, super_verbose=False): """ Compute APLS metric Notes ----- Computes APLS Arguments --------- all_pairs_lengths_gt_native : dict Dict of paths for gt graph. all_pairs_lengths_prop_native : dict Dict of paths for prop graph. all_pairs_lengths_gt_prime : dict Dict of paths for gt graph with control points from prop. all_pairs_lengths_prop_prime : dict Dict of paths for prop graph with control points from gt. control_points_gt : list Array of control points. control_points_prop : list Array of control points. min_path_length : float Minimum path length to evaluate. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. super_verbose : boolean Switch to print mucho values to screen. Defaults to ``False``. Returns ------- C_tot, C_gt_onto_prop, C_prop_onto_gt : tuple C_tot is the total APLS score C_gt_onto_prop is the score when inserting gt control nodes onto prop C_prop_onto_gt is the score when inserting prop control nodes onto gt """ t0 = time.time() # return 0 if no paths if (len(list(all_pairs_lengths_gt_native.keys())) == 0) \ or (len(list(all_pairs_lengths_prop_native.keys())) == 0): if verbose: print("len(all_pairs_lengths_gt_native.keys()) == 0)") return 0, 0, 0 #################### # compute metric (gt to prop) control_nodes = [z[0] for z in control_points_gt] if verbose: print(("control_nodes_gt:", control_nodes)) C_gt_onto_prop, diffs, routes, diff_dic = path_sim_metric( all_pairs_lengths_gt_native, all_pairs_lengths_prop_prime, control_nodes=control_nodes, min_path_length=min_path_length, diff_max=1, missing_path_len=-1, normalize=True, verbose=super_verbose) dt1 = time.time() - t0 if verbose: print("len(diffs):", len(diffs)) if len(diffs) > 0: print(" max(diffs):", np.max(diffs)) print(" min(diffs)", np.min(diffs)) #################### # compute metric (prop to gt) t1 = time.time() control_nodes = [z[0] for z in control_points_prop] if verbose: print("control_nodes:", control_nodes) C_prop_onto_gt, diffs, routes, diff_dic = path_sim_metric( all_pairs_lengths_prop_native, all_pairs_lengths_gt_prime, control_nodes=control_nodes, min_path_length=min_path_length, diff_max=1, missing_path_len=-1, normalize=True, verbose=super_verbose) dt2 = time.time() - t1 if verbose: print("len(diffs):", len(diffs)) if len(diffs) > 0: print(" max(diffs):", np.max(diffs)) print(" min(diffs)", np.min(diffs)) #################### # Total if (C_gt_onto_prop <= 0) or (C_prop_onto_gt <= 0) \ or (np.isnan(C_gt_onto_prop)) or (np.isnan(C_prop_onto_gt)): C_tot = 0 else: C_tot = scipy.stats.hmean([C_gt_onto_prop, C_prop_onto_gt]) if np.isnan(C_tot): C_tot = 0 # print("Total APLS Metric = Mean(", np.round(C_gt_onto_prop, 2), "+", # np.round(C_prop_onto_gt, 2), # ") =", np.round(C_tot, 2)) return C_tot, C_gt_onto_prop, C_prop_onto_gt ################################################################################ def gather_files(truth_dir, prop_dir, im_dir='', max_files=1000, gt_subgraph_filter_weight='length', gt_min_subgraph_length=5, speed_key='inferred_speed_mps', travel_time_key='travel_time_s', verbose=False,\ n_threads=12): """ Build lists of ground truth and proposal graphs Arguments --------- truth_dir : str Location of ground truth graphs. prop_dir : str Location of proposal graphs. im_dir : str Location of image files. Defaults to ``''``. max_files : int Maximum number of files to analyze. Defaults to ``1000``. gt_subgraph_filter_weight : str Edge key for filtering ground truth edge length. Defaults to ``'length'``. gt_min_subgraph_length : float Minimum length of the edge. Defaults to ``5``. speed_key : str Edge key for speed. Defaults to ``'inferred_speed_mps'``. travel_time_key : str Edge key for travel time. Defaults to ``'travel_time_s'``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. super_verbose : boolean Switch to print mucho values to screen. Defaults to ``False``. Returns ------- gt_list, gt_raw_list, gp_list, root_list, im_loc_list : tuple gt_list is a list of ground truth graphs. gp_list is a list of proposal graphs root_list is a list of names im_loc_list is the location of the images corresponding to root_list """ def get_file_by_id(id, dir, ext): """Get filename from {dir} by image {id} with certain {ext}ension.""" file_list = [f for f in os.listdir(dir) if f.endswith(id+ext)] if len(file_list) == 0: # raise ValueError(f'img id {id} not found in dir {dir}') return None elif len(file_list) > 1: raise ValueError(f'Duplicated img id {id} in dir {dir}', f'filename list: {file_list}') return file_list[0] ################### gt_list, gp_list, root_list, im_loc_list = [], [], [], [] ################### # use ground truth spacenet geojsons, and submission pkl files valid_road_types = set([]) # assume no road type in geojsons name_list = [f for f in os.listdir(truth_dir) if f.endswith('.geojson')] # truncate until max_files name_list = name_list[:max_files] i_list = list(range(len(name_list))) if n_threads is not None: n_threads = min(n_threads, len(name_list)) print(f"Checking valid scoring pairs from {len(name_list)} ground truths ...") # for i, f in tqdm(enumerate(name_list), total=len(name_list)): def get_valid_pairs(i, f): '''Helper function for parallel multi-processing. i : int index of enumerate(name_list) f : str filename from truth_dir, element in name_list ''' # skip non-geojson files if not f.endswith('.geojson'): return None, None, None, None # ground-truth file gt_file = os.path.join(truth_dir, f) imgid = f.split('.')[0].split('_')[-1] # in 'img???' format # reference image file im_file = get_file_by_id(imgid, im_dir, '.tif') if im_file is None: return None, None, None, None outroot = im_file.split('.')[0] im_file = os.path.join(im_dir, im_file) # proposal file prop_file = get_file_by_id(imgid, prop_dir, '.gpickle') if prop_file is None: return None, None, None, None prop_file = os.path.join(prop_dir, prop_file) ######### # ground truth osmidx, osmNodeidx = 10000, 10000 G_gt_init, G_gt_raw = \ _create_gt_graph(gt_file, im_file, network_type='all_private', valid_road_types=valid_road_types, subgraph_filter_weight=gt_subgraph_filter_weight, min_subgraph_length=gt_min_subgraph_length, osmidx=osmidx, osmNodeidx=osmNodeidx, speed_key=speed_key, travel_time_key=travel_time_key, verbose=verbose) # # skip empty ground truth graphs # if len(G_gt_init.nodes()) == 0: # continue if verbose: # print a node node = list(G_gt_init.nodes())[-1] print(node, "gt random node props:", G_gt_init.nodes[node]) # print an edge edge_tmp = list(G_gt_init.edges())[-1] try: props = G_gt_init.edges[edge_tmp[0], edge_tmp[1], 0] except: props = G_gt_init.edges[edge_tmp[0], edge_tmp[1], "0"] print("gt random edge props for edge:", edge_tmp, " = ", props) ######### # proposal G_p_init = nx.read_gpickle(prop_file) # print a few values if verbose: # print a node try: node = list(G_p_init.nodes())[-1] print(node, "prop random node props:", G_p_init.nodes[node]) # print an edge edge_tmp = list(G_p_init.edges())[-1] print("prop random edge props for edge:", edge_tmp, " = ", G_p_init.edges[edge_tmp[0], edge_tmp[1], 0]) except: print("Empty proposal graph") # return (map to reduce) return G_gt_init, G_p_init, outroot, im_file # Multiprocessing to accelerate the gathering process. if n_threads is None: print("Running in parallel using all threads ...") else: print("Running in parallel using {} threads ...".format(n_threads)) map_reduce_res = p_umap(get_valid_pairs, i_list, name_list, num_cpus=n_threads) unzipped = list(zip(*map_reduce_res)) # distribute result lists def filter_none(l): return [x for x in l if x is not None] gt_list = filter_none(unzipped[0]) gp_list = filter_none(unzipped[1]) root_list = filter_none(unzipped[2]) im_loc_list = filter_none(unzipped[3]) return gt_list, gp_list, root_list, im_loc_list ############################################################################### def execute(output_dir, gt_list, gp_list, root_list, weight='length', speed_key='inferred_speed_mps', travel_time_key='travel_time_s', max_files=1000, linestring_delta=50, is_curved_eps=10**3, max_snap_dist=4, max_nodes=500, min_path_length=10, allow_renaming=True, verbose=True, super_verbose=False, n_threads=12): """ Compute APLS for the input data in gt_list, gp_list Arguments --------- output_dir: str dir to write output files into. weight : str Edge key determining path length weights. Defaults to ``'length'``. speed_key : str Edge key for speed. Defaults to ``'inferred_speed_mps'``. travel_time_key : str Edge key for travel time. Defaults to ``'travel_time_s'``. max_files : int Maximum number of files to analyze. Defaults to ``1000``. linestring_delta : float Distance in meters between linestring midpoints. Defaults to ``50``. is_curved_eps : float Minumum curvature for injecting nodes (if curvature is less than this value, no midpoints will be injected). If < 0, always inject points on line, regardless of curvature. Defaults to ``0.3``. max_snap_dist : float Maximum distance a node can be snapped onto a graph. Defaults to ``4``. max_nodes : int Maximum number of gt nodes to inject midpoints. If there are more gt nodes than this, skip midpoints and use this number of points to comput APLS. min_path_length : float Mimumum path length to consider for APLS. Defaults to ``10``. allow_renaming : boolean Switch to rename nodes when injecting nodes into graphs. Defaulst to ``True``. verbose : boolean Switch to print relevant values to screen. Defaults to ``False``. super_verbose : boolean Switch to print mucho values to screen. Defaults to ``False``. Returns ------- None """ # now compute results C_arr = [["outroot", "APLS", "APLS_gt_onto_prop", "APLS_prop_onto_gt"]] # make dirs os.makedirs(output_dir, exist_ok=True) ################## t0 = time.time() # truncate until max_files root_list = root_list[:max_files] gt_list = gt_list[:max_files] gp_list = gp_list[:max_files] if n_threads is not None: n_threads = min(n_threads, len(root_list)) print(f'Computing scores for {len(root_list)} pairs in total ...') # for i, [outroot, G_gt_init, G_p_init] in tqdm( # enumerate(zip(root_list, gt_list, gp_list)), total=len(root_list)): def compute_score_arr(outroot, G_gt_init, G_p_init): # get graphs with midpoints and geometry (if small graph) if len(G_gt_init.nodes()) < 500: # 2000: G_gt_cp, G_p_cp, G_gt_cp_prime, G_p_cp_prime, \ control_points_gt, control_points_prop, \ all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, \ all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime \ = make_graphs(G_gt_init, G_p_init, weight=weight, speed_key=speed_key, travel_time_key=travel_time_key, linestring_delta=linestring_delta, is_curved_eps=is_curved_eps, max_snap_dist=max_snap_dist, allow_renaming=allow_renaming, verbose=verbose) # get large graphs and paths else: G_gt_cp, G_p_cp, G_gt_cp_prime, G_p_cp_prime, \ control_points_gt, control_points_prop, \ all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, \ all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime \ = make_graphs_yuge(G_gt_init, G_p_init, weight=weight, speed_key=speed_key, travel_time_key=travel_time_key, max_nodes=max_nodes, max_snap_dist=max_snap_dist, allow_renaming=allow_renaming, verbose=verbose, super_verbose=super_verbose) if verbose: print("\nlen control_points_gt:", len(control_points_gt)) if len(G_gt_init.nodes()) < 200: print("G_gt_init.nodes():", G_gt_init.nodes()) print("len G_gt_init.edges():", len(G_gt_init.edges())) if len(G_gt_cp.nodes()) < 200: print("G_gt_cp.nodes():", G_gt_cp.nodes()) print("len G_gt_cp.nodes():", len(G_gt_cp.nodes())) print("len G_gt_cp.edges():", len(G_gt_cp.edges())) print("len G_gt_cp_prime.nodes():", len(G_gt_cp_prime.nodes())) print("len G_gt_cp_prime.edges():", len(G_gt_cp_prime.edges())) print("\nlen control_points_prop:", len(control_points_prop)) if len(G_p_init.nodes()) < 200: print("G_p_init.nodes():", G_p_init.nodes()) print("len G_p_init.edges():", len(G_p_init.edges())) if len(G_p_cp.nodes()) < 200: print("G_p_cp.nodes():", G_p_cp.nodes()) print("len G_p_cp.nodes():", len(G_p_cp.nodes())) print("len G_p_cp.edges():", len(G_p_cp.edges())) print("len G_p_cp_prime.nodes():", len(G_p_cp_prime.nodes())) if len(G_p_cp_prime.nodes()) < 200: print("G_p_cp_prime.nodes():", G_p_cp_prime.nodes()) print("len G_p_cp_prime.edges():", len(G_p_cp_prime.edges())) print("len all_pairs_lengths_gt_native:", len(dict(all_pairs_lengths_gt_native))) print("len all_pairs_lengths_gt_prime:", len(dict(all_pairs_lengths_gt_prime))) print("len all_pairs_lengths_prop_native", len(dict(all_pairs_lengths_prop_native))) print("len all_pairs_lengths_prop_prime", len(dict(all_pairs_lengths_prop_prime))) ######################### # Metric C, C_gt_onto_prop, C_prop_onto_gt = compute_apls_metric( all_pairs_lengths_gt_native, all_pairs_lengths_prop_native, all_pairs_lengths_gt_prime, all_pairs_lengths_prop_prime, control_points_gt, control_points_prop, min_path_length=min_path_length, verbose=verbose) # C_arr.append([outroot, C, C_gt_onto_prop, C_prop_onto_gt]) return [outroot, C, C_gt_onto_prop, C_prop_onto_gt] # Multiprocessing to accelerate the scoring process. if n_threads is None: print("Running in parallel using all threads ...") else: print("Running in parallel using {} threads ...".format(n_threads)) map_reduce_res = p_umap(compute_score_arr, root_list, gt_list, gp_list, num_cpus=n_threads) C_arr += map_reduce_res # append results below header # print and save total cost tf = time.time() if verbose: print(("Time to compute metric:", tf - t0, "seconds")) print(("N input images:", len(root_list))) # save to csv path_csv = os.path.join(output_dir, 'scores_weight='+str(weight)+'.csv') df = pd.DataFrame(C_arr[1:], columns=C_arr[0]) df.to_csv(path_csv) print("Weight is " + str(weight)) print("Mean APLS = ", np.mean(df['APLS'].values)) ################################################################################ def main(): parser = argparse.ArgumentParser() parser.add_argument('--output_dir', default='./results', type=str, help='Dir path to write output files into') parser.add_argument('--truth_dir', default='', type=str, help='Location of ground truth graphs') parser.add_argument('--prop_dir', default='', type=str, help='Location of proposal graphs') parser.add_argument('--im_dir', default='', type=str, help='Location of images (optional)') parser.add_argument('--max_snap_dist', default=4, type=int, help='Buffer distance (meters) around graph') parser.add_argument('--linestring_delta', default=50, type=int, help='Distance between midpoints on edges') parser.add_argument('--is_curved_eps', default=-1, type=float, help='Line curvature above which midpoints will be' ' injected, (< 0 to inject midpoints on straight' ' lines). 0.12 is a good value if not all lines are ' ' to be used') parser.add_argument('--min_path_length', default=0.001, type=float, help='Minimum path length to consider for metric') parser.add_argument('--max_nodes', default=1000, type=int, help='Maximum number of nodes to compare for APLS' ' metric') parser.add_argument('--max_files', default=1000, type=int, help='Maximum number of graphs to analyze') parser.add_argument('--weight', default='length', type=str, help='Weight for APLS metric [length, travel_time_s') parser.add_argument('--speed_key', default='inferred_speed_mps', type=str, help='Key in edge properties for speed') parser.add_argument('--travel_time_key', default='travel_time_s', type=str, help='Key in edge properties for travel_time') parser.add_argument('--allow_renaming', default=1, type=int, help='Switch to rename nodes. Defaults to 1 (True)') parser.add_argument('--n_threads', default=None, type=int, help='desired number of threads for multi-proc') args = parser.parse_args() # Filtering parameters (shouldn't need changed) args.gt_subgraph_filter_weight = 'length' args.gt_min_subgraph_length = 5 args.prop_subgraph_filter_weight = 'length_pix' args.prop_min_subgraph_length = 10 # GSD = 0.3 # general settings verbose = False super_verbose = False # Gather files gt_list, gp_list, root_list, _ = gather_files( args.truth_dir, args.prop_dir, im_dir=args.im_dir, max_files=args.max_files, gt_subgraph_filter_weight=args.gt_subgraph_filter_weight, gt_min_subgraph_length=args.gt_min_subgraph_length, speed_key=args.speed_key, travel_time_key=args.travel_time_key, verbose=verbose, n_threads=args.n_threads) # Compute execute( args.output_dir, gt_list, gp_list, root_list, weight=args.weight, speed_key=args.speed_key, travel_time_key=args.travel_time_key, max_files=args.max_files, linestring_delta=args.linestring_delta, is_curved_eps=args.is_curved_eps, max_snap_dist=args.max_snap_dist, max_nodes=args.max_nodes, min_path_length=args.min_path_length, allow_renaming=bool(args.allow_renaming), verbose=verbose, super_verbose=super_verbose, n_threads=args.n_threads) if __name__ == "__main__": main()