Merge pull request #1317 from gboeing/validate

ENH: add graph validation function
This commit is contained in:
Geoff Boeing
2025-12-15 16:44:51 -08:00
committed by GitHub
9 changed files with 579 additions and 94 deletions

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@
.pytest_cache
*.vrt
.DS_Store
.python-version
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@@ -2,8 +2,9 @@
## 2.1.0 (TBD)
- add Python 3.14 support
- drop Python 3.9 and 3.10 support
- add Python 3.14 support (#1336)
- drop Python 3.9 and 3.10 support (#1322 #1336)
- add validation functions to verify that a graph or GeoDataFrame satisfies OSMnx expectations (#1317)
## 2.0.7 (2025-11-25)
@@ -25,7 +26,7 @@
## 2.0.4 (2025-06-11)
- fix bug in features module when elements have pre-existing geometry tags (#1298)
- fix bug in save_graphml function where gephi compatibility mode erases node attributes (#1300)
- fix bug in save_graphml function where Gephi compatibility mode erases node attributes (#1300)
- bump minimum required minor versions of optional extras to earliest versions with linux/amd64 wheels (#1296)
## 2.0.3 (2025-05-06)

View File

@@ -101,7 +101,7 @@ Convert, Project, Save
OSMnx's ``convert`` module can convert a MultiDiGraph to a `DiGraph`_ if you prefer a directed representation of the network without any parallel edges, or to a `MultiGraph`_ if you need an undirected representation for use with functions or algorithms that only accept a MultiGraph object. If you just want a fully bidirectional graph (such as for a walking network), just configure the ``settings`` module's ``bidirectional_network_types`` before creating your graph.
The ``convert`` module can also convert a MultiDiGraph to/from GeoPandas node and edge `GeoDataFrames`_. The nodes GeoDataFrame is indexed by OSM ID and the edges GeoDataFrame is multi-indexed by ``u, v, key`` just like a NetworkX edge. This allows you to load arbitrary node/edge ShapeFiles or GeoPackage layers as GeoDataFrames then model them as a MultiDiGraph for graph analysis. Read more about the :ref:`convert <osmnx-convert-module>` module in the User Reference.
The ``convert`` module can also convert a MultiDiGraph to/from GeoPandas node and edge `GeoDataFrames`_. The nodes GeoDataFrame is indexed by OSM ID and the edges GeoDataFrame is multi-indexed by ``u, v, key`` just like a NetworkX edge. This allows you to load arbitrary node/edge ShapeFiles or GeoPackage layers as GeoDataFrames then model them as a MultiDiGraph for graph analysis. The ``convert`` module exposes validation functions to verify that your MultiDiGraph or GeoDataFrames satisfy OSMnx requirements. Read more about the :ref:`convert <osmnx-convert-module>` module in the User Reference.
You can easily project your graph to different coordinate reference systems using the ``projection`` module. If you're unsure which `CRS`_ you want to project to, OSMnx can automatically determine an appropriate UTM CRS for you. Read more about the :ref:`projection <osmnx-projection-module>` module in the User Reference.

View File

@@ -186,3 +186,11 @@ osmnx.utils_geo module
:members:
:private-members:
:noindex:
osmnx._validate module
----------------------
.. automodule:: osmnx._validate
:members:
:private-members:
:noindex:

View File

@@ -9,6 +9,10 @@ class GraphSimplificationError(ValueError):
"""Exception for a problem with graph simplification."""
class ValidationError(ValueError):
"""Exception for failed graph or node/edge GeoDataFrame validation."""
class InsufficientResponseError(ValueError):
"""Exception for empty or too few results in server response."""

387
osmnx/_validate.py Normal file
View File

@@ -0,0 +1,387 @@
"""Validate that graphs and GeoDataFrames satisfy OSMnx expectations."""
from __future__ import annotations
import logging as lg
from numbers import Real
from warnings import warn
import geopandas as gpd
import networkx as nx
import numpy as np
from ._errors import ValidationError
from .utils import log
def _verify_numeric_edge_attribute(G: nx.MultiDiGraph, attr: str, *, strict: bool = True) -> None:
"""
Verify attribute values are numeric and non-null across graph edges.
Raises a ValidationError if this attribute contains non-numeric
values, and issues a UserWarning if this attribute is missing or null on
any edges.
Parameters
----------
G
Input graph.
attr
Name of the edge attribute to verify.
strict
If `True`, elevate warnings to errors.
"""
is_valid = True
valid_msg = "Verified {attr!r} values are numeric and non-null across graph edges."
warn_msg = ""
err_msg = ""
try:
values_float = (np.array(tuple(G.edges(data=attr)))[:, 2]).astype(float)
if np.isnan(values_float).any():
warn_msg += f"The attribute {attr!r} is missing or null on some edges."
if strict:
is_valid = False
except ValueError:
err_msg += f"The edge attribute {attr!r} contains non-numeric values."
is_valid = False
_report_validation(is_valid, valid_msg, warn_msg, err_msg)
def _validate_features_gdf(gdf: gpd.GeoDataFrame) -> None:
"""
Validate that features GeoDataFrame satisfies OSMnx expectations.
Raises a `ValidationError` if validation fails.
Parameters
----------
gdf
GeoDataFrame of features uniquely multi-indexed by
`(element_type, osmid)`.
"""
is_valid = True
valid_msg = "Validated features GeoDataFrame."
warn_msg = ""
err_msg = ""
# ensure gdf is uniquely indexed
if not gdf.index.is_unique:
err_msg += "`gdf` must be uniquely indexed. "
is_valid = False
# ensure gdf is multi-indexed with 2 levels (element_type and osmid) and
# that the element types are all either node, way, or relation
features_index_levels = 2
check1 = gdf.index.nlevels == features_index_levels
element_types = set(gdf.index.get_level_values(0))
check2 = element_types.issubset({"node", "way", "relation"})
if not (check1 and check2):
err_msg += "`gdf` must be multi-indexed by `(element_type, osmid)`. "
is_valid = False
# ensure gdf has an active geometry column with all valid non-null geoms
if (gdf.active_geometry_name is None) or (
gdf.geometry.isna() | gdf.geometry.is_empty | ~gdf.geometry.is_valid
).any():
err_msg += "`gdf` must contain valid, non-null geometries`. "
is_valid = False
_report_validation(is_valid, valid_msg, warn_msg, err_msg)
def _validate_node_edge_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
*,
strict: bool = True,
) -> None:
"""
Validate that node/edge GeoDataFrames can be converted to a MultiDiGraph.
Raises a `ValidationError` if validation fails.
Parameters
----------
gdf_nodes
GeoDataFrame of graph nodes uniquely indexed by `osmid`.
gdf_edges
GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`.
strict
If `True`, elevate warnings to errors.
"""
is_valid = True
valid_msg = "Validated that node/edge GeoDataFrames can be converted to a MultiDiGraph."
warn_msg = ""
err_msg = ""
# ensure type is GeoDataFrame
if not (isinstance(gdf_nodes, gpd.GeoDataFrame) and isinstance(gdf_edges, gpd.GeoDataFrame)):
# if they are not both GeoDataFrames
err_msg += "`gdf_nodes` and `gdf_edges` must be GeoDataFrames. "
is_valid = False
# if they are both GeoDataFrames...
# warn user if geometry values differ from coordinates in x/y columns,
# because we ignore the geometry column
elif gdf_nodes.active_geometry_name is not None:
msg = (
"Will ignore the `gdf_nodes` 'geometry' column, though its values "
"differ from the coordinates in the 'x' and 'y' columns. "
)
try:
all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all()
all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all()
if not (all_x_match and all_y_match):
# warn if x/y coords don't match geometry column
warn_msg += msg
if strict:
is_valid = False
except ValueError:
# warn if geometry column contains non-point geometry types
warn_msg += msg
if strict:
is_valid = False
# ensure gdf_nodes has x and y columns representing node geometries
if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns):
err_msg += "`gdf_nodes` must have 'x' and 'y' columns. "
is_valid = False
# ensure gdf_nodes and gdf_edges are uniquely indexed
if not (gdf_nodes.index.is_unique and gdf_edges.index.is_unique):
err_msg += "`gdf_nodes` and `gdf_edges` must each be uniquely indexed. "
is_valid = False
# ensure 1) gdf_edges are multi-indexed with 3 levels and 2) that its u
# and v values (first two index levels) all appear among gdf_nodes index
edges_index_levels = 3
check1 = gdf_edges.index.nlevels == edges_index_levels
try:
uv = set(gdf_edges.index.get_level_values(0)) | set(gdf_edges.index.get_level_values(1))
check2 = uv.issubset(set(gdf_nodes.index))
except IndexError:
check2 = False
if not (check1 and check2):
err_msg += "`gdf_edges` must be multi-indexed by `(u, v, key)`. "
is_valid = False
_report_validation(is_valid, valid_msg, warn_msg, err_msg)
def _validate_nodes(G: nx.MultiDiGraph, strict: bool) -> tuple[bool, str, str]: # noqa: FBT001
"""
Validate that a graph's nodes satisfy OSMnx expectations.
Parameters
----------
G
The input graph.
strict
If `True`, elevate warnings to errors.
Returns
-------
is_valid, err_msg, warn_msg
Whether validation passed, plus any error or warning messages.
"""
# assume nodes are valid but try to falsify that through a series of tests
is_valid = True
err_msg = ""
warn_msg = ""
# ERR: must have at least 1 node
if not len(G.nodes) > 0:
err_msg += "G must have at least 1 node. "
is_valid = False
# otherwise, it has at least 1 node, so validate the node attributes
else:
# ERR: nodes must have "x" and "y" data attributes
if not all("x" in d and "y" in d for d in dict(G.nodes(data=True)).values()):
err_msg += "Nodes must have 'x' and 'y' data attributes. "
is_valid = False
# WARN: nodes' "x" and "y" data attributes should be type Real
valid_xs = all(isinstance(x, Real) for x in nx.get_node_attributes(G, name="x").values())
valid_ys = all(isinstance(y, Real) for y in nx.get_node_attributes(G, name="y").values())
if not (valid_xs and valid_ys):
warn_msg += "Node 'x' and 'y' data attributes should be numeric. "
if strict:
is_valid = False
# WARN: nodes should have "street_count" data attributes
if not all("street_count" in d for d in dict(G.nodes(data=True)).values()):
warn_msg += "Nodes should have 'street_count' data attributes. "
if strict:
is_valid = False
# WARN: nodes' "x" and "y" data attributes should be type Real
valid_xs = all(isinstance(x, Real) for x in nx.get_node_attributes(G, name="x").values())
valid_ys = all(isinstance(y, Real) for y in nx.get_node_attributes(G, name="y").values())
if not (valid_xs and valid_ys):
warn_msg += "Node 'x' and 'y' data attributes should be numeric. "
if strict:
is_valid = False
# WARN: node IDs should be type int
if not all(isinstance(n, int) for n in G.nodes):
warn_msg += "Node IDs should be type int. "
if strict:
is_valid = False
return is_valid, err_msg, warn_msg
def _validate_edges(G: nx.MultiDiGraph, strict: bool) -> tuple[bool, str, str]: # noqa: FBT001
"""
Validate that a graph's edges satisfy OSMnx expectations.
Parameters
----------
G
The input graph.
strict
If `True`, elevate warnings to errors.
Returns
-------
is_valid, err_msg, warn_msg
Whether validation passed, plus any error or warning messages.
"""
# assume edges are valid but try to falsify that through a series of tests
is_valid = True
err_msg = ""
warn_msg = ""
# ERR: must have at least 1 edge
if not len(G.edges) > 0:
err_msg += "G must have at least 1 edge. "
is_valid = False
# otherwise, it has at least 1 edge, so validate the edge attributes
else:
# ERR: edges must have "osmid" data attributes
edge_osmids = nx.get_edge_attributes(G, name="osmid")
if set(edge_osmids) != set(G.edges):
err_msg += "Edges must have 'osmid' data attributes. "
is_valid = False
# WARN: edge "osmid" data attributes should be type int or list[int]
if not all(isinstance(x, (int, list)) for x in edge_osmids.values()):
warn_msg += "Edge 'osmid' data attributes should be type `int` or `list[int]`. "
if strict:
is_valid = False
# ERR: edges must have "length" data attributes
edge_lengths = nx.get_edge_attributes(G, name="length")
if set(edge_lengths) != set(G.edges):
err_msg += "Edges must have 'length' data attributes. "
is_valid = False
# WARN: edge "length" data attributes should be numeric
if not all(isinstance(x, Real) for x in edge_lengths.values()):
warn_msg += "Edge 'length' data attributes should be numeric. "
if strict:
is_valid = False
return is_valid, err_msg, warn_msg
def _validate_graph_attrs(G: nx.MultiDiGraph) -> tuple[bool, str, str]:
"""
Validate that a graph's attributes satisfy OSMnx expectations.
Parameters
----------
G
The input graph.
Returns
-------
is_valid, err_msg, warn_msg
Whether validation passed, plus any error or warning messages.
"""
# assume G is valid but try to falsify that through a series of tests
is_valid = True
err_msg = ""
warn_msg = ""
# ERR: must be a NetworkX MultiDiGraph
if not isinstance(G, nx.MultiDiGraph):
err_msg += "G must be a NetworkX MultiDiGraph. "
is_valid = False
# ERR: must have top-level graph, nodes, and edges attributes
if not (hasattr(G, "graph") and hasattr(G, "nodes") and hasattr(G, "edges")):
err_msg += "G must have top-level graph, nodes, and edges attributes. "
is_valid = False
# ERR: graph attr dict must have a "crs" key defining its CRS
crs = getattr(G, "graph", {}).get("crs")
if crs is None:
err_msg += "G.graph must have a 'crs' data attribute. "
is_valid = False
# ERR: graph attr dict "crs" value must be a valid pyproj CRS
else:
try:
_ = gpd.GeoSeries(crs=crs).crs
except RuntimeError: # RuntimeError is parent of pyproj CRSError
err_msg += "G.graph['crs'] must be a valid CRS. "
is_valid = False
return is_valid, err_msg, warn_msg
def _validate_graph(G: nx.MultiDiGraph, *, strict: bool = True) -> None:
"""
Validate that a graph object satisfies OSMnx expectations.
Raises `ox._errors.ValidationError` if validation fails.
Parameters
----------
G
The input graph.
strict
If `True`, elevate warnings to errors.
"""
# validate graph, nodes, and edges
is_valid_graph, err_msg_graph, warn_msg_graph = _validate_graph_attrs(G)
is_valid_nodes, err_msg_nodes, warn_msg_nodes = _validate_nodes(G, strict)
is_valid_edges, err_msg_edges, warn_msg_edges = _validate_edges(G, strict)
# report results
is_valid = is_valid_graph and is_valid_nodes and is_valid_edges
err_msg = err_msg_graph + err_msg_nodes + err_msg_edges
warn_msg = warn_msg_graph + warn_msg_nodes + warn_msg_edges
valid_msg = "Successfully validated graph."
_report_validation(is_valid, valid_msg, warn_msg, err_msg)
def _report_validation(is_valid: bool, valid_msg: str, warn_msg: str, err_msg: str) -> None: # noqa: FBT001
"""
Report validation results by logging, warning, or raising an exception.
Parameters
----------
is_valid
Whether or not the validation succeeded.
valid_msg
The message to log if validation succeeded.
warn_msg
Any warning messages to log and either issue a warning or include in
error message.
err_msg
Any error messages to include when raising exception if validation
failed.
"""
if is_valid:
log(valid_msg, level=lg.INFO)
if warn_msg != "":
log(warn_msg, level=lg.WARNING)
warn(warn_msg, category=UserWarning, stacklevel=2)
else:
log(err_msg + warn_msg, level=lg.ERROR)
raise ValidationError(err_msg + warn_msg)

View File

@@ -7,7 +7,6 @@ import logging as lg
from typing import Any
from typing import Literal
from typing import overload
from warnings import warn
import geopandas as gpd
import networkx as nx
@@ -15,9 +14,65 @@ import pandas as pd
from shapely import LineString
from shapely import Point
from . import _validate
from . import utils
def validate_graph(G: nx.MultiDiGraph, *, strict: bool = True) -> None:
"""
Validate that a graph object satisfies OSMnx expectations.
Raises `ox._errors.GraphValidationError` if validation fails.
Parameters
----------
G
The input graph.
strict
If `True`, enforce optional rules in addition to required rules. These
optional rules primarily enforce expected attribute data types.
"""
_validate._validate_graph(G, strict=strict)
def validate_node_edge_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
*,
strict: bool = True,
) -> None:
"""
Validate that node/edge GeoDataFrames can be converted to a MultiDiGraph.
Raises a `ValidationError` if validation fails.
Parameters
----------
gdf_nodes
GeoDataFrame of graph nodes uniquely indexed by `osmid`.
gdf_edges
GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`.
strict
If `True`, elevate warnings to errors.
"""
_validate._validate_node_edge_gdfs(gdf_nodes, gdf_edges, strict=strict)
def validate_features_gdf(gdf: gpd.GeoDataFrame) -> None:
"""
Validate that features GeoDataFrame satisfies OSMnx expectations.
Raises a `ValidationError` if validation fails.
Parameters
----------
gdf
GeoDataFrame of features uniquely multi-indexed by
`(element_type, osmid)`.
"""
_validate._validate_features_gdf(gdf)
# nodes and edges are both missing (therefore both default true)
@overload
def graph_to_gdfs(
@@ -208,60 +263,6 @@ def graph_to_gdfs(
raise ValueError(msg)
def _validate_node_edge_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
) -> None:
"""
Validate that node/edge GeoDataFrames can be converted to a MultiDiGraph.
Raises a `ValueError` if validation fails.
Parameters
----------
gdf_nodes
GeoDataFrame of graph nodes uniquely indexed by `osmid`.
gdf_edges
GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`.
"""
# ensure gdf_nodes contains x and y columns representing node geometries
if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover
msg = "`gdf_nodes` must contain 'x' and 'y' columns."
raise ValueError(msg)
# ensure gdf_nodes and gdf_edges are uniquely indexed
if not (gdf_nodes.index.is_unique and gdf_edges.index.is_unique): # pragma: no cover
msg = "`gdf_nodes` and `gdf_edges` must each be uniquely indexed."
raise ValueError(msg)
# ensure 1) gdf_edges are multi-indexed with 3 levels and 2) that its u
# and v values (first two index levels) all appear among gdf_nodes index
edges_index_levels = 3
check1 = gdf_edges.index.nlevels == edges_index_levels
uv = set(gdf_edges.index.get_level_values(0)) | set(gdf_edges.index.get_level_values(1))
check2 = uv.issubset(set(gdf_nodes.index))
if not (check1 and check2): # pragma: no cover
msg = "`gdf_edges` must be multi-indexed by `(u, v, key)`."
raise ValueError(msg)
# warn user if geometry values differ from coordinates in x/y columns,
# because we discard the geometry column
if gdf_nodes.active_geometry_name is not None: # pragma: no cover
msg = (
"Discarding the `gdf_nodes` 'geometry' column, though its values "
"differ from the coordinates in the 'x' and 'y' columns."
)
try:
all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all()
all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all()
if not (all_x_match and all_y_match):
# warn if x/y coords don't match geometry column
warn(msg, category=UserWarning, stacklevel=2)
except ValueError: # pragma: no cover
# warn if geometry column contains non-point geometry types
warn(msg, category=UserWarning, stacklevel=2)
def graph_from_gdfs(
gdf_nodes: gpd.GeoDataFrame,
gdf_edges: gpd.GeoDataFrame,
@@ -299,7 +300,7 @@ def graph_from_gdfs(
G
The converted MultiDiGraph.
"""
_validate_node_edge_gdfs(gdf_nodes, gdf_edges)
validate_node_edge_gdfs(gdf_nodes, gdf_edges)
# drop geometry column from gdf_nodes (since we use x and y for geometry
# information), but warn the user if the geometry values differ from the

View File

@@ -12,12 +12,12 @@ from collections.abc import Iterator
from typing import TYPE_CHECKING
from typing import Any
from typing import overload
from warnings import warn
import networkx as nx
import numpy as np
import pandas as pd
from . import _validate
from . import convert
from . import utils
@@ -335,7 +335,7 @@ def shortest_path(
The node IDs constituting the shortest path, or, if `orig` and `dest`
are both iterable, then a list of such paths.
"""
_verify_edge_attribute(G, weight)
_validate._verify_numeric_edge_attribute(G, weight, strict=False)
# if neither orig nor dest is iterable, just return the shortest path
if not (isinstance(orig, Iterable) or isinstance(dest, Iterable)):
@@ -407,7 +407,7 @@ def k_shortest_paths(
path
The node IDs constituting the next-shortest path.
"""
_verify_edge_attribute(G, weight)
_validate._verify_numeric_edge_attribute(G, weight, strict=False)
paths_gen = nx.shortest_simple_paths(
G=convert.to_digraph(G, weight=weight),
source=orig,
@@ -454,30 +454,6 @@ def _single_shortest_path(
return None
def _verify_edge_attribute(G: nx.MultiDiGraph, attr: str) -> None:
"""
Verify attribute values are numeric and non-null across graph edges.
Raises a ValueError if this attribute contains non-numeric values, and
issues a UserWarning if this attribute is missing or null on any edges.
Parameters
----------
G
Input graph.
attr
Name of the edge attribute to verify.
"""
try:
values_float = (np.array(tuple(G.edges(data=attr)))[:, 2]).astype(float)
if np.isnan(values_float).any():
msg = f"The attribute {attr!r} is missing or null on some edges."
warn(msg, category=UserWarning, stacklevel=2)
except ValueError as e:
msg = f"The edge attribute {attr!r} contains non-numeric values."
raise ValueError(msg) from e
def add_edge_speeds(
G: nx.MultiDiGraph,
*,

View File

@@ -76,17 +76,105 @@ def test_exceptions() -> None:
"""Test the custom errors."""
message = "testing exception"
with pytest.raises(ox._errors.ResponseStatusCodeError):
raise ox._errors.ResponseStatusCodeError(message)
with pytest.raises(ox._errors.CacheOnlyInterruptError):
raise ox._errors.CacheOnlyInterruptError(message)
with pytest.raises(ox._errors.GraphSimplificationError):
raise ox._errors.GraphSimplificationError(message)
with pytest.raises(ox._errors.ValidationError):
raise ox._errors.ValidationError(message)
with pytest.raises(ox._errors.InsufficientResponseError):
raise ox._errors.InsufficientResponseError(message)
with pytest.raises(ox._errors.GraphSimplificationError):
raise ox._errors.GraphSimplificationError(message)
with pytest.raises(ox._errors.ResponseStatusCodeError):
raise ox._errors.ResponseStatusCodeError(message)
@pytest.mark.xdist_group(name="group1")
def test_validating() -> None: # noqa: PLR0915
"""Test validating graph inputs and objects."""
# validate graph edge attribute is numeric and non-null
G = nx.MultiDiGraph()
G.add_edge(0, 1)
with pytest.raises(ox._errors.ValidationError):
ox._validate._verify_numeric_edge_attribute(G, "length", strict=True)
# features GeoDataFrame validation
# pass in gdf with missing geometries and non-unique, non-multi index
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_features_gdf(gpd.GeoDataFrame(index=[0, 0]))
# node/edge GeoDataFrame validation
# pass in wrong types, bad indexes, and missing x/y columns
gdf_nodes = pd.DataFrame(index=[0, 0])
gdf_edges = pd.DataFrame()
with suppress_type_checks(), pytest.raises(ox._errors.ValidationError):
ox.convert.validate_node_edge_gdfs(gdf_nodes, gdf_edges)
# pass in non-Point node geometries
gdf_nodes = gpd.GeoDataFrame(geometry=[Polygon(), Polygon()])
gdf_edges = gpd.GeoDataFrame()
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_node_edge_gdfs(gdf_nodes, gdf_edges)
# pass in x/y not matching geometries
data = {"x": [0, 1], "y": [2, 3]}
gdf_nodes = gpd.GeoDataFrame(data=data, geometry=[Point((6, 7)), Point((8, 9))])
gdf_edges = gpd.GeoDataFrame()
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_node_edge_gdfs(gdf_nodes, gdf_edges)
# graph validation
# pass an empty non-MultiDiGraph
G = nx.Graph()
with suppress_type_checks(), pytest.raises(ox._errors.ValidationError):
ox.convert.validate_graph(G)
# test missing top-level graph attribute and non-int node IDs
G = nx.MultiDiGraph()
del G.graph
G.add_edge("0", "1")
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_graph(G)
# pass an empty MultiDiGraph with an invalid CRS
G = nx.MultiDiGraph()
G.graph["crs"] = "epsg:999999"
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_graph(G)
# fix the CRS and add an edge
G.graph["crs"] = "epsg:4326"
G.add_edge(0, 1)
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_graph(G)
# add required node attributes, but with invalid types
nx.set_node_attributes(G, values=None, name="x")
nx.set_node_attributes(G, values=None, name="y")
nx.set_node_attributes(G, values=None, name="street_count")
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_graph(G)
# fix the invalid node attribute types
nx.set_node_attributes(G, values=0, name="x")
nx.set_node_attributes(G, values=0, name="y")
nx.set_node_attributes(G, values=None, name="street_count")
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_graph(G)
# add required edge attributes, but with invalid types
nx.set_edge_attributes(G, values=None, name="osmid")
nx.set_edge_attributes(G, values=None, name="length")
with pytest.raises(ox._errors.ValidationError):
ox.convert.validate_graph(G)
# fix the invalid node attribute types: should finally pass validation
nx.set_edge_attributes(G, values=[0], name="osmid")
nx.set_edge_attributes(G, values=1.5, name="length")
ox.convert.validate_graph(G)
@pytest.mark.xdist_group(name="group1")
@@ -230,6 +318,7 @@ def test_osm_xml() -> None:
# load and test graph_from_xml across the .osm, .bz2, and .gz files
for filepath in (path_bz2, path_gz_temp, path_osm_temp):
G = ox.graph_from_xml(filepath)
ox.convert.validate_graph(G, strict=False) # non-strict because nodes lack street_count
assert node_id in G.nodes
for neighbor_id in neighbor_ids:
@@ -268,9 +357,11 @@ def test_osm_xml() -> None:
# save a projected/consolidated graph as OSM XML
Gc = ox.simplification.consolidate_intersections(ox.projection.project_graph(G))
ox.convert.validate_graph(Gc)
nx.set_node_attributes(Gc, 0, name="uid")
ox.io.save_graph_xml(Gc, fp) # issues UserWarning
Gc = ox.graph.graph_from_xml(fp) # issues UserWarning
ox.convert.validate_graph(Gc, strict=False) # non-strict because nodes lack street_count
_ = etree.parse(fp, parser=parser)
# restore settings
@@ -530,6 +621,7 @@ def test_endpoints() -> None:
def test_save_load() -> None: # noqa: PLR0915
"""Test saving/loading graphs to/from disk."""
G = ox.graph_from_point(location_point, dist=500, network_type="drive")
ox.convert.validate_graph(G)
# save/load geopackage and convert graph to/from node/edge GeoDataFrames
ox.save_graph_geopackage(G, directed=False)
@@ -537,9 +629,11 @@ def test_save_load() -> None: # noqa: PLR0915
ox.save_graph_geopackage(G, filepath=fp, directed=True)
gdf_nodes1 = gpd.read_file(fp, layer="nodes").set_index("osmid")
gdf_edges1 = gpd.read_file(fp, layer="edges").set_index(["u", "v", "key"])
G2 = ox.graph_from_gdfs(gdf_nodes1, gdf_edges1)
G2 = ox.convert.graph_from_gdfs(gdf_nodes1, gdf_edges1)
ox.convert.validate_graph(G2, strict=False) # non-strict because osmid wasn't loaded as int
G2 = ox.graph_from_gdfs(gdf_nodes1, gdf_edges1, graph_attrs=G.graph)
gdf_nodes2, gdf_edges2 = ox.graph_to_gdfs(G2)
ox.convert.validate_graph(G2, strict=False) # non-strict because osmid wasn't loaded as int
gdf_nodes2, gdf_edges2 = ox.convert.graph_to_gdfs(G2)
_ = list(ox.utils_geo.interpolate_points(gdf_edges2["geometry"].iloc[0], 0.001))
assert set(gdf_nodes1.index) == set(gdf_nodes2.index) == set(G.nodes) == set(G2.nodes)
assert set(gdf_edges1.index) == set(gdf_edges2.index) == set(G.edges) == set(G2.edges)
@@ -586,6 +680,7 @@ def test_save_load() -> None: # noqa: PLR0915
node_dtypes={attr_name: ox.io._convert_bool_string},
edge_dtypes={attr_name: ox.io._convert_bool_string},
)
ox.convert.validate_graph(G2)
# verify everything in G is equivalent in G2
assert tuple(G.graph.keys()) == tuple(G2.graph.keys())
@@ -607,6 +702,7 @@ def test_save_load() -> None: # noqa: PLR0915
nd = {"osmid": str}
ed = {"length": str, "osmid": float}
G2 = ox.load_graphml(fp, node_dtypes=nd, edge_dtypes=ed)
ox.convert.validate_graph(G2, strict=False) # non-strict because of non-standard types
# test loading graphml from a file stream
graphml = Path("tests/input_data/short.graphml").read_text(encoding="utf-8")
@@ -625,21 +721,28 @@ def test_graph_from() -> None:
_ = ox.utils_geo.bbox_from_point(location_point, dist=1000, project_utm=True, return_crs=True)
bbox = ox.utils_geo.bbox_from_point(location_point, dist=500)
G = ox.graph_from_bbox(bbox, network_type="drive")
ox.convert.validate_graph(G)
G = ox.graph_from_bbox(bbox, network_type="drive_service", truncate_by_edge=True)
ox.convert.validate_graph(G)
# truncate graph by bounding box
bbox = ox.utils_geo.bbox_from_point(location_point, dist=400)
G = ox.truncate.truncate_graph_bbox(G, bbox)
ox.convert.validate_graph(G)
G = ox.truncate.largest_component(G, strongly=True)
ox.convert.validate_graph(G)
# graph from address
G = ox.graph_from_address(address=address, dist=500, dist_type="bbox", network_type="bike")
ox.convert.validate_graph(G)
# graph from list of places
G = ox.graph_from_place([place1], which_result=[None], network_type="all")
ox.convert.validate_graph(G)
# graph from polygon
G = ox.graph_from_polygon(polygon, network_type="walk", truncate_by_edge=True, simplify=False)
ox.convert.validate_graph(G)
G = ox.simplify_graph(
G,
node_attrs_include=["junction", "ref"],
@@ -647,6 +750,7 @@ def test_graph_from() -> None:
remove_rings=False,
track_merged=True,
)
ox.convert.validate_graph(G)
# test custom query filter
cf = (
@@ -664,10 +768,12 @@ def test_graph_from() -> None:
dist_type="bbox",
network_type="all_public",
)
ox.convert.validate_graph(G)
# test union of multiple custom filters
cf_union = ['["highway"~"tertiary"]', '["railway"~"tram"]']
G = ox.graph_from_point(location_point, dist=500, custom_filter=cf_union, retain_all=True)
ox.convert.validate_graph(G)
ox.settings.overpass_memory = 1073741824
G = ox.graph_from_point(
@@ -676,6 +782,7 @@ def test_graph_from() -> None:
dist_type="network",
network_type="all",
)
ox.convert.validate_graph(G)
@pytest.mark.xdist_group(name="group3")