Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ lib/PyLD.egg-info
profiler
tests/test_caching.py
tests/data/test_caching.json

# JetBrains IDEs
.idea

# pyenv
.python-version
40 changes: 34 additions & 6 deletions lib/pyld/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -3661,12 +3661,33 @@ def _object_to_rdf(self, item, issuer, triples, rdfDirection):
elif _is_bool(value):
object['value'] = 'true' if value else 'false'
object['datatype'] = datatype or XSD_BOOLEAN
elif _is_double(value) or datatype == XSD_DOUBLE:
# canonical double representation
object['value'] = re.sub(
r'(\d)0*E\+?0*(\d)', r'\1E\2',
('%1.15E' % value))
object['datatype'] = datatype or XSD_DOUBLE

elif _is_double(value):
return {
**object,
'value': _canonicalize_double(value),
'datatype': datatype or XSD_DOUBLE,
}

elif datatype == XSD_DOUBLE:
# Since the previous branch did not activate, we know that `value` is not a float number.
try:
float_value = float(value)
except (ValueError, TypeError):
# If `value` is not convertible to float, we will return it as-is.
return {
**object,
'value': value,
'datatype': XSD_DOUBLE,
}
else:
# We have a float, and canonicalization may proceed.
return {
**object,
'value': _canonicalize_double(float_value),
'datatype': XSD_DOUBLE,
}

elif _is_integer(value):
object['value'] = str(value)
object['datatype'] = datatype or XSD_INTEGER
Expand Down Expand Up @@ -6390,6 +6411,13 @@ def _is_double(v):
return not isinstance(v, Integral) and isinstance(v, Real)


def _canonicalize_double(value: float) -> str:
"""Convert a float value to canonical lexical form of `xsd:double`."""
return re.sub(
r'(\d)0*E\+?0*(\d)', r'\1E\2',
('%1.15E' % value))


def _is_numeric(v):
"""
Returns True if the given value is numeric.
Expand Down
69 changes: 69 additions & 0 deletions tests/test_double_to_rdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Tests for to_rdf functionality, specifically focusing on double/float handling bugs.
"""

import json
import sys
import os
import unittest

# Add the lib directory to the path so we can import pyld
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib'))

import pyld.jsonld


class TestDoubleToRdf(unittest.TestCase):
"""Test cases for to_rdf functionality with double/float values."""

def test_offline_pyld_bug_reproduction(self):
"""Test reproducing the PyLD bug with captured Wikidata data structure."""
# This is the exact problematic data structure captured from Wikidata Q399
# The bug occurs when PyLD tries to convert this to RDF
data = {
"@context": {
"xsd": "http://www.w3.org/2001/XMLSchema#",
"geoLongitude": "http://www.w3.org/2003/01/geo/wgs84_pos#longitude"
},
"@graph": [
{
"@id": "http://www.wikidata.org/entity/Q399",
"geoLongitude": {
"@type": "xsd:double",
"@value": "45" # This string number causes the PyLD bug
}
}
]
}

# This should work now that the bug is fixed
# The bug was in PyLD's _object_to_rdf method where string values
# with @type: "xsd:double" were not being converted to float
result = pyld.jsonld.to_rdf(data)

# Expected result after bug fix
expected = {
"@default": [
{
"subject": {
"type": "IRI",
"value": "http://www.wikidata.org/entity/Q399"
},
"predicate": {
"type": "IRI",
"value": "http://www.w3.org/2003/01/geo/wgs84_pos#longitude"
},
"object": {
"type": "literal",
"value": "4.5E1",
"datatype": "http://www.w3.org/2001/XMLSchema#double"
}
}
]
}

self.assertEqual(result, expected)


if __name__ == '__main__':
unittest.main()