caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* ocaml for the Semantic Web
@ 2009-08-18 10:27 tumenjargal tsagaan
  2009-08-19  7:20 ` [Caml-list] " Sebastien Ferre
  0 siblings, 1 reply; 2+ messages in thread
From: tumenjargal tsagaan @ 2009-08-18 10:27 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 193 bytes --]

Hi,

(1) is there any specialized APIs for processing RDF as well as OWL file?
(2) is there any similar API in Ocaml like XML-parsers from Java world?

Thank you.

Tumee.




      

[-- Attachment #2: Type: text/html, Size: 328 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [Caml-list] ocaml for the Semantic Web
  2009-08-18 10:27 ocaml for the Semantic Web tumenjargal tsagaan
@ 2009-08-19  7:20 ` Sebastien Ferre
  0 siblings, 0 replies; 2+ messages in thread
From: Sebastien Ferre @ 2009-08-19  7:20 UTC (permalink / raw)
  To: tumenjargal tsagaan, caml-list

[-- Attachment #1: Type: text/plain, Size: 923 bytes --]

Hi,

I am also interested in processing semantic web languages in OCaml,
and I haven't found anything yet.
Some months ago, I wrote a parser for RDF files (using Xml-light).
This cannot be considered as an "API" for RDF, but the hard work of
analysing the RDF-XML is done (source file as attachment).

Sébastien

tumenjargal tsagaan wrote:
> Hi,
> 
> (1) is there any specialized APIs for processing RDF as well as OWL file?
> (2) is there any similar API in Ocaml like XML-parsers from Java world?
> 
> Thank you.
> 
> Tumee.
> 
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> Caml-list mailing list. Subscription management:
> http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
> Archives: http://caml.inria.fr
> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
> Bug reports: http://caml.inria.fr/bin/caml-bugs

[-- Attachment #2: rdf.ml --]
[-- Type: text/plain, Size: 6105 bytes --]

(*
   Extracting RDF statements from the XML structure
   generated by the library 'xml-light'.

   Author: Sébastien Ferré <ferre@irisa.fr>
   Creation: 11/02/2009

*)

type uri = string
type id = string
type lang = string
type datatype = Plain of lang | Typed of uri
type thing = URI of uri | XMLLiteral of Xml.xml | Literal of string * datatype | Blank of id
type property = uri

type tree = Node of thing * (property * (uri option * tree)) list
type rdf = {
    xmlns : (string * string) list;
    trees : tree list
  }

(* accessors *)

let subject (Node (s, _)) = s

let properties (Node (_, ps)) = ps

let all_objects (Node (_, ps)) p = List.fold_right (fun (p', (_,o')) res -> if p' = p then o'::res else res) ps []

let statement (Node (_, ps)) p o =
  let (_, (uri, _)) = List.find (fun (p', (uri', Node (o', _))) -> p' = p && o' = o) ps in
  uri

(* RDF vocabulary *)

(* namespace *)
let namespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
(* classes *)
let _XMLLiteral = "rdf:XMLLiteral"
let _Property = "rdf:Property"
let _Statement = "rdf:Statement"
let _Bag = "rdf:Bag"
let _Set = "rdf:Set"
let _Alt = "rdf:Alt"
let _List = "rdf:List"
(* properties *)
let _type = "rdf:type"
let _first = "rdf:first"
let _rest = "rdf:rest"
let _value = "rdf:value"
let _subject = "rdf:subject"
let _object = "rdf:object"
let _predicate = "rdf:predicate"
let _n n = "rdf:_" ^ string_of_int n
(* ressources *)
let _nil = "rdf:nil"

(* parsing *)

type parse_ctx = { base : string; lang : string}

exception Failure
exception Error

let parse_list p l =
  List.rev
    (List.fold_left
       (fun res x -> try p x :: res with _ -> res)
       [] l)

let default_ctx = { base = ""; lang = ""}

let get_ctx previous_ctx e =
  { base = (try Xml.attrib e "xml:base" with _ -> previous_ctx.base);
    lang = (try Xml.attrib e "xml:lang" with _ -> previous_ctx.lang)}

let resolve ctx rel =
  if String.contains rel ':'
  then rel
  else ctx.base ^ rel

let resolve_tag ctx tag =
  if String.contains tag ':'
  then tag
  else ctx.base ^ "#" ^ tag

let isCoreSyntaxTerm x =
  List.mem x ["rdf:RDF"; "rdf:ID"; "rdf:about"; "rdf:parseType"; "rdf:resource"; "rdf:nodeID"; "rdf:datatype"]

let isSyntaxTerm x =
  isCoreSyntaxTerm x || List.mem x ["rdf:Description"; "rdf:li"]

let isOldTerm x = List.mem x ["rdf:aboutEach"; "rdf:aboutEachPrefix"; "rdf:bagID"]

let isNodeElementURI x = not (isCoreSyntaxTerm x || x = "rdf:li" || isOldTerm x)
let isPropertyElementURI x = not (isCoreSyntaxTerm x || x = "rdf:Description" || isOldTerm x)
let isPropertyAttributeURI x = not (isCoreSyntaxTerm x || x = "rdf:Description" || x = "rdf:li" || isOldTerm x)

let rec parse_RDF e =
  if Xml.tag e = "rdf:RDF"
  then {
    xmlns =
      List.fold_right
      (fun (a,v) res ->
	let i = try String.index a ':' with _ -> String.length a in
	if String.sub a 0 i = "xmlns"
	then
	  let ns =
	    if i = String.length a
	    then ""
	    else String.sub a (i+1) (String.length a - (i+1)) in
	  (ns,v)::res
	else res)
      (Xml.attribs e) [];
    trees =
      let ctx = get_ctx default_ctx e in
      parse_list (parse_nodeElement ctx) (Xml.children e)
  }
  else raise Failure

and parse_nodeElement previous_ctx e =
  let tag = Xml.tag e in
  let ctx = get_ctx previous_ctx e in
  if isNodeElementURI tag
  then
    let subject =
      try URI (resolve ctx ("#" ^ Xml.attrib e "rdf:ID")) with _ ->
      try Blank (Xml.attrib e "rdf:nodeID") with _ ->
      try URI (resolve ctx (Xml.attrib e "rdf:about")) with _ ->
	  Blank "" in
    let properties =
      (if tag = "rdf:Description" then [] else [(_type, (None, Node (URI (resolve_tag ctx tag), [])))]) @
      parse_list (parse_propertyAttr ctx) (Xml.attribs e) @
      parse_list (parse_propertyElt ctx (ref 0)) (Xml.children e) in
    Node (subject, properties)
  else raise Failure

and parse_propertyAttr ctx (a,v) =
  if isPropertyAttributeURI a
  then
    if a = _type
    then (a, (None, Node (URI (resolve ctx v), [])))
    else (a, (None, Node (Literal (v, Plain ctx.lang), [])))
  else raise Failure

and parse_propertyElt previous_ctx cpt e =
  incr cpt;
  let tag = match Xml.tag e with "rdf:li" -> "_" ^ string_of_int !cpt | s -> s in
  let ctx = get_ctx previous_ctx e in
  if isPropertyElementURI tag
  then
    let reified = try Some (resolve ctx ("#" ^ Xml.attrib e "rdf:ID")) with _ -> None in
    let children = Xml.children e in
    try
      match Xml.attrib e "rdf:parseType" with
      | "Resource" -> (* parseTypeResourcePropertyElt *)
	  let properties = parse_list (parse_propertyElt ctx (ref 0)) children in
	  (tag, (reified, Node (Blank "", properties)))
      | "Collection" -> (* parseTypeCollectionPropertyElt *)
	  let t =
	    List.fold_right
	      (fun n res ->
		Node (Blank "",
		      [ (_first, (None, n));
			(_rest, (None, res))]))
	      (parse_list (parse_nodeElement ctx) children)
	      (Node (URI _nil, [])) in
	  (tag, (reified, t))
      | "Literal" (* parseTypeLiteralPropertyElt *)
      | _ -> (* parseTypeOtherPropertyElt *)
	  let xml = match children with [n] -> n | _ -> raise Error in
	  (tag, (reified, Node (XMLLiteral xml, [])))
    with _ ->
      match children with
      | [Xml.Element _ as n] -> (* resourcePropertyElt *)
	  let t = parse_nodeElement ctx n in
	  (tag, (reified, t))
      | [Xml.PCData s] -> (* literalPropertyElt *)
	  let d =
	    try Typed (Xml.attrib e "rdf:datatype") with _ ->
	        Plain ctx.lang in
	  (tag, (reified, Node (Literal (s,d), [])))
      | [] -> (* emptyPropertyElt *)
	  let attribs = Xml.attribs e in
	  ( match attribs with
	  | []
	  | ["rdf:ID",_] ->
	      (tag, (reified, Node (Literal ("",Plain ctx.lang), [])))
	  | _ ->
	      let obj =
		try URI (resolve ctx (Xml.attrib e "rdf:resource")) with _ ->
		try Blank (Xml.attrib e "rdf:nodeID") with _ ->
		    Blank "" in
	      let properties = parse_list (parse_propertyAttr ctx) attribs in
	      (tag, (reified, Node (obj, properties)))
	   )
      | _ -> raise Error
  else raise Failure

let from_xml xml =
  try parse_RDF xml with _ ->
      { xmlns = []; trees = [parse_nodeElement default_ctx xml]}

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2009-08-19  7:20 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-18 10:27 ocaml for the Semantic Web tumenjargal tsagaan
2009-08-19  7:20 ` [Caml-list] " Sebastien Ferre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).