Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "welearn-database"
version = "1.5.0"
version = "1.5.1"
description = "All stuff related to relationnal database from the WeLearn project"
authors = [
{name = "Théo",email = "theo.nardin@cri-paris.org"}
Expand Down
42 changes: 26 additions & 16 deletions tests/test_document_related.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import uuid
from dataclasses import dataclass
from unittest import TestCase
from zlib import adler32

Expand All @@ -20,6 +21,12 @@
from welearn_database.exceptions import ContentIsTooShort, InvalidDOI, InvalidURLScheme


@dataclass
class AuthorDetails:
name: str
misc: str


class TestWeLearnDocument(TestCase):
def test_validate_url(self):
test_doc = WeLearnDocument(
Expand All @@ -29,7 +36,7 @@ def test_validate_url(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

self.assertEqual(test_doc.url, "https://example.com/test-document")
Expand All @@ -43,7 +50,7 @@ def test_validate_wrong_scheme_url(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

def test_validate_wrong_url(self):
Expand All @@ -55,7 +62,7 @@ def test_validate_wrong_url(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

def test_validate_full_content(self):
Expand All @@ -66,7 +73,7 @@ def test_validate_full_content(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

self.assertEqual(
Expand All @@ -83,7 +90,7 @@ def test_validate_too_short_full_content(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

def test_full_content(self):
Expand All @@ -94,7 +101,7 @@ def test_full_content(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

self.assertEqual(
Expand All @@ -110,7 +117,10 @@ def test_doi(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author", "doi": "10.1000/xyz123"},
details={
"author": AuthorDetails(name="Test Author", misc=""),
"doi": "10.1000/xyz123",
},
doi="10.1000/xyz123",
)

Expand All @@ -126,7 +136,7 @@ def test_invalid_doi(self):
lang="en",
corpus="Test Corpus",
details={
"author": "Test Author",
"author": AuthorDetails(name="Test Author", misc=""),
"doi": "11.1590/s0100-879x2002000500007",
},
doi="11.1590/s0100-879x2002000500007",
Expand All @@ -142,7 +152,7 @@ def test_unclean_doi(self):
lang="en",
corpus="Test Corpus",
details={
"author": "Test Author",
"author": AuthorDetails(name="Test Author", misc=""),
"doi": "https://doi.org/10.1000/xyz123",
},
doi="https://doi.org/10.1000/xyz123",
Expand All @@ -156,7 +166,7 @@ def test_description(self):
description="<p>A short description &nbsp of the test document.</p>",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

self.assertEqual(
Expand Down Expand Up @@ -202,7 +212,7 @@ def test_external_id(self):
full_content="This is a test document, used for unit testing, please ignore. Thank you!",
description="A short description of the test document.",
lang="en",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

test_session.add(test_doc)
Expand Down Expand Up @@ -232,7 +242,7 @@ def test_trace(self):
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)

self.assertEqual(test_doc.trace, expected_trace)
Expand Down Expand Up @@ -275,7 +285,7 @@ def test_trace_in_db(self):
description="A short description of the test document.",
lang="en",
corpus_id=test_corpus.id,
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)
test_session.add(test_doc)
test_session.commit()
Expand Down Expand Up @@ -322,7 +332,7 @@ def test_none_trace_in_db(self):
description="A short description of the test document.",
lang="en",
corpus_id=test_corpus.id,
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)
test_session.add(test_doc)
test_session.commit()
Expand Down Expand Up @@ -372,7 +382,7 @@ def test_view_qty_document(self):
description="A short description of the test document.",
lang="en",
corpus_id=test_corpus.id,
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)
test_session.add(test_doc)
test_process_state = ProcessState(
Expand Down Expand Up @@ -441,7 +451,7 @@ def test_error_data_quality(self):
description="A short description of the test document.",
lang="en",
corpus_id=test_corpus.id,
details={"author": "Test Author"},
details={"author": AuthorDetails(name="Test Author", misc="")},
)
test_session.add(test_doc)
test_process_state = ProcessState(
Expand Down
30 changes: 30 additions & 0 deletions welearn_database/data/details_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dataclasses import asdict, is_dataclass

from sqlalchemy import types


class DetailsDict(types.TypeDecorator):
"""
Convert dataclass instances into dictionaries for JSON storage.
"""
Comment thread
lpi-tn marked this conversation as resolved.

impl = types.JSON
Comment thread
lpi-tn marked this conversation as resolved.
cache_ok = True

@staticmethod
def _is_dataclass_instance(obj):
return is_dataclass(obj) and not isinstance(obj, type)

def _inner_serialize_dataclass(self, value):
match value:
case list():
return [self._inner_serialize_dataclass(item) for item in value]
case dict():
return {k: self._inner_serialize_dataclass(v) for k, v in value.items()}
if self._is_dataclass_instance(value):
return asdict(value)
return value

def process_bind_param(self, value, dialect):
# Serialize recursively without mutating the original object stored on the ORM instance.
return self._inner_serialize_dataclass(value)
3 changes: 2 additions & 1 deletion welearn_database/data/models/document_related.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sqlalchemy.dialects.postgresql import ARRAY, ENUM, TIMESTAMP
from sqlalchemy.orm import Mapped, mapped_column, relationship, validates

from welearn_database.data.details_dict import DetailsDict
from welearn_database.data.enumeration import (
ContextType,
Counter,
Expand Down Expand Up @@ -83,7 +84,7 @@ class WeLearnDocument(Base):
lang: Mapped[str | None]
description: Mapped[str | None]
full_content: Mapped[str | None]
details: Mapped[dict[str, Any] | None]
details: Mapped[dict[str, Any] | None] = mapped_column(DetailsDict)
trace: Mapped[int | None] = mapped_column(types.BIGINT)
corpus_id: Mapped[UUID] = mapped_column(
types.Uuid,
Expand Down
Loading