diff --git a/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/Task06.py b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/Task06.py new file mode 100644 index 00000000..6bcb1bdf --- /dev/null +++ b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/Task06.py @@ -0,0 +1,120 @@ +##Task 06: Modifying RDF(s) +import urllib.request +url = 'https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/refs/heads/master/Assignment4/course_materials/python/validation.py' +urllib.request.urlretrieve(url, 'validation.py') +github_storage = "https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/master/Assignment4/course_materials" + +##Import RDFLib main methods +from rdflib import Graph, Namespace, Literal, XSD +from rdflib.namespace import RDF, RDFS +from validation import Report +g = Graph() +g.namespace_manager.bind('ns', Namespace("http://somewhere#"), override=False) +r = Report() + +##Create a new class named Researcher +ns = Namespace("http://mydomain.org#") +g.add((ns.Researcher, RDF.type, RDFS.Class)) +for s, p, o in g: + print(s,p,o) + +##Task 6.0: Create new prefixes for "ontology" and "person" as shown in slide 14 of the Slidedeck 01a.RDF(s)-SPARQL shown in class. +# this task is validated in the next step +ont = Namespace("http://oeg.fi.upm.es/def/ontology#") +per = Namespace("http://oeg.fi.upm.es/def/people#") + +##TASK 6.1: Reproduce the taxonomy of classes shown in slide 34 in class (all the classes under "Vocabulario", Slidedeck: 01a.RDF(s)-SPARQL). Add labels for each of them as they are in the diagram (exactly) with no language tags. Remember adding the correct datatype (xsd:String) when appropriate +# TO DO +#Persona +g.add((per.Person, RDF.type, RDFS.Class)) +g.add((per.Person, RDFS.label, Literal("Person", datatype=XSD.string))) +#Profesor +g.add((per.Professor, RDF.type, RDFS.Class)) +g.add((per.Professor, RDFS.subClassOf, per.Person)) +g.add((per.Professor, RDFS.label, Literal("Professor", datatype=XSD.string))) +#Professor COmpleto +g.add((per.FullProfessor, RDF.type, RDFS.Class)) +g.add((per.FullProfessor, RDFS.subClassOf, per.Professor)) +g.add((per.FullProfessor, RDFS.label, Literal("FullProfessor", datatype=XSD.string))) +#Profesor Asociado +g.add((per.AssociateProfessor, RDF.type, RDFS.Class)) +g.add((per.AssociateProfessor, RDFS.subClassOf, per.Professor)) +g.add((per.AssociateProfessor, RDFS.label, Literal("AssociateProfessor", datatype=XSD.string))) +#Profesor INterino +g.add((per.InterimAssociateProfessor, RDF.type, RDFS.Class)) +g.add((per.InterimAssociateProfessor, RDFS.subClassOf, per.AssociateProfessor)) +g.add((per.InterimAssociateProfessor, RDFS.label, Literal("InterimAssociateProfessor", datatype=XSD.string))) +# Visualize the results +for s, p, o in g: + print(s,p,o) +# Validation. Do not remove +r.validate_task_06_01(g) + +##TASK 6.2: Add the 3 properties shown in slide 36. Add labels for each of them (exactly as they are in the slide, with no language tags), and their corresponding domains and ranges using RDFS. Remember adding the correct datatype (xsd:String) when appropriate. If a property has no range, make it a literal (string) +# TO DO +#Propiedad hasName +g.add((per.hasName, RDF.type, RDF.Property)) +g.add((per.hasName, RDFS.domain, per.Person)) +g.add((per.hasName, RDFS.range, RDFS.Literal)) +g.add((per.hasName, RDFS.label, Literal("hasName", datatype=XSD.string))) +#Propiedad hasColleague +g.add((per.hasColleague, RDF.type, RDF.Property)) +g.add((per.hasColleague, RDFS.domain, per.Person)) +g.add((per.hasColleague, RDFS.range, per.Person)) +g.add((per.hasColleague, RDFS.label, Literal("hasColleague", datatype=XSD.string))) +#Propiedad hasHomePage +g.add((per.hasHomePage, RDF.type, RDF.Property)) +g.add((per.hasHomePage, RDFS.domain, per.FullProfessor)) +g.add((per.hasHomePage, RDFS.range, RDFS.Literal)) +g.add((per.hasHomePage, RDFS.label, Literal("hasHomePage", datatype=XSD.string))) +# Visualize the results +for s, p, o in g: + print(s,p,o) + +# Validation. Do not remove +r.validate_task_06_02(g) + +##TASK 6.3: Create the individuals shown in slide 36 under "Datos". Link them with the same relationships shown in the diagram." +# TO DO +#Namespace +data = Namespace("http://oeg.fi.upm.es/resource/person/") +#Oscar +g.add((data.Oscar, RDF.type, per.AssociateProfessor)) +g.add((data.Oscar, per.hasColleague, data.Asun)) +g.add((data.Oscar, per.hasName, Literal("Oscar Corcho García", datatype=XSD.string))) +g.add((data.Oscar, RDFS.label, Literal("Oscar", datatype=XSD.string))) +#Asun +g.add((data.Asun, RDF.type, per.FullProfessor)) +g.add((data.Asun, per.hasColleague, data.Raul)) +g.add((data.Asun, per.hasHomePage, Literal("http://www.oeg-upm.net/", datatype=XSD.string))) +g.add((data.Asun, RDFS.label, Literal("Asun", datatype=XSD.string))) +#Raul +g.add((data.Raul, RDF.type, per.InterimAssociateProfessor)) +g.add((data.Raul, RDFS.label, Literal("Raul", datatype=XSD.string))) +# Visualize the results +for s, p, o in g: + print(s,p,o) + +##TASK 6.4: Add to the individual person:Oscar the email address, given and family names. Use the properties already included in example 4 to describe Jane and John (https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/master/Assignment4/course_materials/rdf/example4.rdf). Do not import the namespaces, add them manually +# TO DO +#Namespace +foaf = Namespace("http://xmlns.com/foaf/0.1/") +vcard = Namespace("http://www.w3.org/2001/vcard-rdf/3.0/") +#Nuevas relaciones +g.add((vcard.Family, RDF.type, RDF.Property)) +g.add((vcard.Family, RDFS.range, XSD.string)) +g.add((vcard.Given, RDF.type, RDF.Property)) +g.add((vcard.Given, RDFS.range, XSD.string)) +g.add((foaf.email, RDF.type, RDFS.Datatype)) +g.add((foaf.eamil, RDFS.range, XSD.string)) +#Más info de Oscar +g.add((data.Oscar, vcard.Given, Literal("Oscar", datatype=XSD.string))) +g.add((data.Oscar, vcard.Family, Literal("Corcho García", datatype=XSD.string))) +g.add((data.Oscar, foaf.email, Literal("ocorcho@fi.upm.es", datatype=XSD.string))) +# Visualize the results +for s, p, o in g: + print(s,p,o) + +# Validation. Do not remove +r.validate_task_06_04(g) +r.save_report("_Task_06") \ No newline at end of file diff --git a/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/Task07.py b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/Task07.py new file mode 100644 index 00000000..04e12106 --- /dev/null +++ b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/Task07.py @@ -0,0 +1,110 @@ +##Task 07: Querying RDF(s) +import urllib.request +url = 'https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/refs/heads/master/Assignment4/course_materials/python/validation.py' +urllib.request.urlretrieve(url, 'validation.py') +github_storage = "https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/master/Assignment4/course_materials" +from validation import Report + +##First let's read the RDF file +from rdflib import Graph, Namespace, Literal +from rdflib.namespace import RDF, RDFS +# Do not change the name of the variables +g = Graph() +g.namespace_manager.bind('ns', Namespace("http://somewhere#"), override=False) +g.parse(github_storage+"/rdf/data06.ttl", format="TTL") +report = Report() + +##TASK 7.1a: For all classes, list each classURI. If the class belogs to another class, then list its superclass. Do the exercise in RDFLib returning a list of Tuples: (class, superclass) called "result". If a class does not have a super class, then return None as the superclass +# TO DO +#Lista classUri +result=[] +for sb in g.subjects(RDF.type, RDFS.Class): + sc=None + for Sc in g.objects(sb,RDFS.subClassOf): + sc=Sc + result.append((sb,sc)) +for cl, sc in result: + short_c = g.namespace_manager.normalizeUri(cl) + short_sc = g.namespace_manager.normalizeUri(sc) if sc else None +for r in result: + print(r) + +## Validation: Do not remove +report.validate_07_1a(result) + +##TASK 7.1b: Repeat the same exercise in SPARQL, returning the variables ?c (class) and ?sc (superclass) +query = "Select ?c ?sc WHERE {?c rdf:type rdfs:Class. OPTIONAL {?c rdfs:subClassOf ?sc.}}" + +for r in g.query(query): + print(r.c, r.sc) + +## Validation: Do not remove +report.validate_07_1b(query,g) + +##TASK 7.2a: List all individuals of "Person" with RDFLib (remember the subClasses). Return the individual URIs in a list called "individuals" +ns = Namespace("http://oeg.fi.upm.es/def/people#") + +# variable to return +individuals = [] +def subclases(cl): + sb=[] + for s,p,o in g.triples((None,RDFS.subClassOf,cl)): + sb.append(s) + sb += subclases(s) + return sb +clases= subclases(ns.Person) +clases.append(ns.Person) +for cl in clases: + for s,p,o in g.triples((None,RDF.type,cl)): + individuals.append(s) +# visualize results +for i in individuals: + print(i) + +# validation. Do not remove +report.validate_07_02a(individuals) + +##TASK 7.2b: Repeat the same exercise in SPARQL, returning the individual URIs in a variable ?ind +query = "SELECT ?ind WHERE{?c rdfs:subClassOf* . ?ind a ?c}" + +for r in g.query(query): + print(r.ind) +# Visualize the results + +## Validation: Do not remove +report.validate_07_02b(g, query) + +##TASK 7.3: List the name and type of those who know Rocky (in SPARQL only). Use name and type as variables in the query +query = """SELECT ?name ?type WHERE{ + ?name . + ?name rdf:type ?type.}""" +# TO DO +# Visualize the results +for r in g.query(query): + print(r.name, r.type) + +## Validation: Do not remove +report.validate_07_03(g, query) + +##Task 7.4: List the name of those entities who have a colleague with a dog, or that have a collegue who has a colleague who has a dog (in SPARQL). Return the results in a variable called name +query = """ +PREFIX people: +Select ?name WHERE{ + { ?name people:hasColleague ?c1. + ?c1 people:ownsPet ?dog.} + UNION{ + ?name people:hasColleague ?c2. + ?c2 people:hasColleague ?c3. + ?c3 people:ownsPet ?dog.} + } +""" + +for r in g.query(query): + print(r.name) + +# TO DO +# Visualize the results + +## Validation: Do not remove +report.validate_07_04(g,query) +report.save_report("_Task_07") \ No newline at end of file diff --git a/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/report_result_Task_06.txt b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/report_result_Task_06.txt new file mode 100644 index 00000000..16c4e360 --- /dev/null +++ b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/report_result_Task_06.txt @@ -0,0 +1,4 @@ +Hierarchy OK +TASK 6.1 OK +TASK 6.2 OK +TASK 6.4 OK diff --git a/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/report_result_Task_07.txt b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/report_result_Task_07.txt new file mode 100644 index 00000000..412273c0 --- /dev/null +++ b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/report_result_Task_07.txt @@ -0,0 +1,6 @@ +TASK 7.1a OK +TASK 7.1b OK +TASK 7.2a OK +TASK 7.2b OK +TASK 7.3 OK +TASK 7.4 OK diff --git a/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/validation.py b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/validation.py new file mode 100644 index 00000000..6024e0b5 --- /dev/null +++ b/Assignment4/Assigment4_Gonzalo_Hernandez_24C060/validation.py @@ -0,0 +1,258 @@ +from rdflib import Graph, Namespace, Literal, XSD +from rdflib.namespace import RDF, RDFS + +VCARD = Namespace("http://www.w3.org/2001/vcard-rdf/3.0/") +FOAF = Namespace("http://xmlns.com/foaf/0.1/") + +class Report: + def __init__(self): + self.__report = "" + + def domain_and_range_correspond_to_input(self, g,propertyURI,correct_domain,correct_range): + domain = g.value(subject=propertyURI, predicate=RDFS.domain) + range = g.value(subject=propertyURI, predicate=RDFS.range) + if domain is None or range is None: + return False + if domain != correct_domain or range != correct_range: + return False + return True + + def does_it_have_label(self, g, entity): + label = g.value(subject=entity, predicate=RDFS.label) + if label is None: + return False + return True + + def namespace_is_correct_class(self, entity): + if entity is None: + return False + if "http://oeg.fi.upm.es/def/people#" not in entity: + return False + return True + + def namespace_is_correct_instance(self, entity): + if entity is None: + return False + if "http://oeg.fi.upm.es/resource/person/" not in entity: + return False + return True + + def is_subClassOf(self, g, subClass, superClass): + candidate = g.value(subject=subClass, predicate=RDFS.subClassOf, object=None) + if candidate is None or superClass not in candidate: + return False + return True + + def __add_to_report(self, message): + print(message) + self.__report = self.__report + message + "\n" + + def validate_task_06_01(self, g): + error = False + professorURI = g.value(subject=None, predicate=RDFS.label, object=Literal("Professor", datatype=XSD.string)) + personURI = g.value(subject=None, predicate=RDFS.label, object=Literal("Person", datatype=XSD.string)) + associateProfessorURI = g.value(subject=None, predicate=RDFS.label, object=Literal("AssociateProfessor", datatype=XSD.string)) + interimURI = g.value(subject=None, predicate=RDFS.label, object=Literal("InterimAssociateProfessor", datatype=XSD.string)) + fProfessorURI = g.value(subject=None, predicate=RDFS.label, object=Literal("FullProfessor", datatype=XSD.string)) + classes = [professorURI,personURI,associateProfessorURI,interimURI, fProfessorURI] + # check namespace and existence + for i in classes: + if i is None: + self.__add_to_report("ERROR: One of the classes is missing its correct label! I cannot retrieve it") + error = True + return + if self.namespace_is_correct_class(i): + print("The namespace is correct for " + str(i)) + else: + self.__add_to_report("ERROR: The namespace is not correct for " + str(i)) + error = True + # check class hierarchy + if self.is_subClassOf(g, professorURI, personURI) and \ + self.is_subClassOf(g, associateProfessorURI, professorURI) and \ + self.is_subClassOf(g, interimURI, associateProfessorURI) and \ + self.is_subClassOf(g, fProfessorURI, professorURI): + self.__add_to_report("Hierarchy OK") + else: + self.__add_to_report("ERROR: Hierarchy is missing a subclassOf statement") + error = True + if error: + self.__add_to_report("ERROR IN TASK 6.1") + else: + self.__add_to_report("TASK 6.1 OK") + + def validate_task_06_02(self, g): + # check properties + error = False + hasColleague = g.value(subject=None, predicate=RDFS.label, object=Literal("hasColleague", datatype=XSD.string)) + hasName = g.value(subject=None, predicate=RDFS.label, object=Literal("hasName", datatype=XSD.string)) + hasHomePage = g.value(subject=None, predicate=RDFS.label, object=Literal("hasHomePage", datatype=XSD.string)) + personURI = g.value(subject=None, predicate=RDFS.label, object=Literal("Person", datatype=XSD.string)) + fullProfessorURI = g.value(subject=None, predicate=RDFS.label, object=Literal("FullProfessor", datatype=XSD.string)) + properties = [hasColleague, hasName, hasHomePage] + for i in properties: + if i is None: + self.__add_to_report("ERROR: One of the properties is missing its correct label! I cannot retrieve it") + error = True + return + if not self.domain_and_range_correspond_to_input(g,hasColleague,personURI,personURI): + self.__add_to_report("ERROR: hasColleague has an incorrect domain or range") + error = True + if not self.domain_and_range_correspond_to_input(g,hasName,personURI,RDFS.Literal): + self.__add_to_report("ERROR: hasName has an incorrect domain or range") + error = True + if not self.domain_and_range_correspond_to_input(g,hasHomePage,fullProfessorURI,RDFS.Literal): + self.__add_to_report("ERROR: hasHomePage has an incorrect domain or range") + error = True + if error: + self.__add_to_report("ERROR IN TASK 6.2") + else: + self.__add_to_report("TASK 6.2 OK") + + def validate_task_06_03(self, g): + # check all individuals can be retrieved through their label + error = False + oscar = g.value(subject=None, predicate=RDFS.label, object=Literal("Oscar", datatype=XSD.string)) + asun = g.value(subject=None, predicate=RDFS.label, object=Literal("Asun", datatype=XSD.string)) + raul = g.value(subject=None, predicate=RDFS.label, object=Literal("Raul", datatype=XSD.string)) + if oscar is None or asun is None or raul is None: + self.__add_to_report("ERROR: One of the individuals is missing its correct label! I cannot retrieve it") + error = True + # check all individuals have the correct namespace + if not self.namespace_is_correct_instance(oscar): + self.__add_to_report("ERROR: Oscar has an incorrect namespace") + error = True + if not self.namespace_is_correct_instance(asun): + self.__add_to_report("ERROR: Asun has an incorrect namespace") + error = True + if not self.namespace_is_correct_instance(raul): + self.__add_to_report("ERROR: Raul has an incorrect namespace") + error = True + # check all individuals have their properties + oscar_properties = [] + for p in g.predicates(subject=oscar): + oscar_properties.append(p) + asun_properties = [] + for p in g.predicates(subject=asun): + asun_properties.append(p) + if oscar_properties is None or asun_properties is None: + self.__add_to_report("ERROR: One of the individuals has no properties") + error = True + if len(oscar_properties) != 4 or len(asun_properties) != 4: + # oscar: type, label, hasColleague, hasName. + # asun: type, label, hasHomePage, hasColleague + self.__add_to_report("ERROR: One of the individuals has the wrong number of properties") + error = True + if error: + self.__add_to_report("ERROR IN TASK 6.3") + else: + self.__add_to_report("TASK 6.3 OK") + + def validate_task_06_04(self, g): + error = False + target_properties = [VCARD.Given, VCARD.Family, FOAF.email] + #retrieve all triples from Oscar. + oscar_properties = [] + oscar = g.value(subject=None, predicate=RDFS.label, object=Literal("Oscar", datatype=XSD.string)) + for p in g.predicates(subject=oscar): + oscar_properties.append(p) + if oscar_properties is None: + self.__add_to_report("ERROR: Oscar has no properties") + error = True + # do they have the correct ns? + for i in target_properties: + if i not in oscar_properties: + self.__add_to_report("ERROR: One of the properties from Oscar has no correct namespace or does not exist. Please double check") + error = True + if error: + self.__add_to_report("ERROR IN TASK 6.4") + else: + self.__add_to_report("TASK 6.4 OK") + + def save_report(self, task): + report_name = "report_result" + task + ".txt" + with open(report_name, "w", encoding="utf-8") as f: + f.write(self.__report) + + def validate_07_01(self, result, task): + error = False + if len(result) != 7: + self.__add_to_report("ERROR: The number of classes returned is not correct") + error = True + for c,sc in result: + # Anything except Person and Animal must have a superclass + if sc == None and "Person" not in str(c) and "Animal" not in str(c): + self.__add_to_report("The class "+str(c)+" has no superclass") + error = True + if "Person" not in str(c) and "Animal" not in str(c) \ + and "Professor" not in str(c) and "Student" not in str(c) \ + and "FullProfessor" not in str(c) and "AssociateProfessor" not in str(c) \ + and "AssociateProfessor" not in str(c) and "Instructor" not in str(c) \ + and "InterimAssociateProfessor" not in str(c): + self.__add_to_report("ERROR: incorrect class retrieved") + error = True + if not error: + self.__add_to_report(task+" OK") + + def validate_07_1a(self, result): + self.validate_07_01(result, "TASK 7.1a") + + def validate_07_1b(self, query, g): + aux = g.query(query) + aux_dict = [] + for r in g.query(query): + aux_dict.append((r.c, r.sc)) + self.validate_07_01(aux_dict, "TASK 7.1b") + + def validate_07_02(self,result, task): + error = False + if len(result) != 3: + self.__add_to_report("ERROR: The number of individuals returned is not correct") + error = True + for i in result: + if "Asun" not in i and "Raul" not in i and "Oscar" not in i: + self.__add_to_report("ERROR: The individual "+str(i)+" is not correct") + error = True + if error == False: + self.__add_to_report(task+" OK") + + + def validate_07_02a(self, individuals): + self.validate_07_02(individuals, "TASK 7.2a") + + def validate_07_02b(self, g, query): + error = False + aux = g.query(query) + aux_dict = [] + for r in g.query(query): + if (r.ind is None): + self.__add_to_report("ERROR: Variable used to retrieve the individuals is not correct!") + error = True + else: + aux_dict.append(r.ind) + self.validate_07_02(aux_dict, "TASK 7.2b") + + def validate_07_03(self, g, query): + error = False + entities = g.query(query) + if len(list(entities)) != 3: + self.__add_to_report("ERROR: The number of individuals returned is not correct") + error = True + for i in entities: + if "Asun" not in i.name and "Raul" not in i.name and "Fantasma" not in i.name: + self.__add_to_report("ERROR: An individual returned is not correct") + error = True + if not error: + self.__add_to_report("TASK 7.3 OK") + + def validate_07_04(self, g, query): + error = False + entities = g.query(query) + if len(list(entities)) != 3: + self.__add_to_report("ERROR: The number of individuals returned is not correct") + error = True + for i in entities: + if "Asun" not in i.name and "Raul" not in i.name and "Oscar" not in i.name: + self.__add_to_report("ERROR: An individual returned is not correct") + error = True + if not error: + self.__add_to_report("TASK 7.4 OK")