LibDocument

image-alt

Document and Trie Processing.


Version: 0.1.1

Functions and data structures for creating annotated documents that are treelike (cross references permitted). Serialization functions for HTML, JSON, and Python objects. More…

#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/queue.h>
#include <Python.h>
#include "trie.h"

Go to the source code of this file.

Classes

struct  ldoc_raw_t
 Raw data structure (payload pointer and data length). More...
 
union  ldoc_anno_pld_t
 Payload data types for annotations. More...
 
struct  ldoc_doc_anno_t
 Document annotations (payloads of datum and annotation assignments). More...
 
struct  ldoc_py_t
 Python representation of ldoc_doc_anno_t. More...
 
union  ldoc_pld_t
 Payload data types for datums. More...
 
struct  ldoc_ser_t
 Serialization data structure. More...
 
struct  ldoc_nde_t
 A document node. More...
 
struct  ldoc_nde_qstk_t
 Node quick-stack for efficiently handing node allocations/deallocations. More...
 
struct  ldoc_ent_t
 Document entity: a concrete piece of data that is part of a document. More...
 
struct  ldoc_doc_t
 Document structure. More...
 
struct  ldoc_pos_t
 A cursor position within a node. More...
 
union  ldoc_info_t
 Information carrying types in a document: nodes and entities. More...
 
struct  ldoc_res_t
 Entity or node returned by a search for a specific annotation. More...
 
struct  ldoc_coord_t
 Coordinate with in a document. More...
 
struct  ldoc_vis_nde_t
 A node visitor object calls the given visitor functions based on node type. More...
 
struct  ldoc_vis_nde_ord_t
 Ordered visitor structure for setting up serialization, tearing it down, and pre-, in-, and postfix visits. More...
 
struct  ldoc_vis_ent_t
 An entity visitor object calls the given visitor functions based on entity type. More...
 

Macros

#define LDOC_QSTK_FULL(qstk)   (qstk->wptr + 1) % qstk->max == qstk->rptr
 
#define LDOC_QSTK_NDE_ADDR(qstk, n)   sizeof(ldoc_nde_t*) * (n)
 

Typedefs

typedef struct ldoc_raw_t ldoc_raw_t
 Raw data structure (payload pointer and data length). More…
 
typedef struct ldoc_doc_anno_t ldoc_doc_anno_t
 Document annotations (payloads of datum and annotation assignments). More…
 
typedef struct ldoc_py_t ldoc_py_t
 Python representation of ldoc_doc_anno_t. More…
 
typedef struct ldoc_ser_t ldoc_ser_t
 Serialization data structure. More…
 
typedef struct ldoc_nde_t ldoc_nde_t
 A document node. More…
 
typedef struct ldoc_nde_qstk_t ldoc_nde_qstk_t
 Node quick-stack for efficiently handing node allocations/deallocations. More…
 
typedef struct ldoc_ent_t ldoc_ent_t
 Document entity: a concrete piece of data that is part of a document. More…
 
typedef struct ldoc_doc_t ldoc_doc_t
 Document structure. More…
 
typedef struct ldoc_pos_t ldoc_pos_t
 A cursor position within a node. More…
 
typedef struct ldoc_res_t ldoc_res_t
 Entity or node returned by a search for a specific annotation. More…
 
typedef struct ldoc_coord_t ldoc_coord_t
 Coordinate with in a document. More…
 
typedef struct ldoc_vis_nde_t ldoc_vis_nde_t
 A node visitor object calls the given visitor functions based on node type. More…
 
typedef struct ldoc_vis_nde_ord_t ldoc_vis_nde_ord_t
 Ordered visitor structure for setting up serialization, tearing it down, and pre-, in-, and postfix visits. More…
 
typedef struct ldoc_vis_ent_t ldoc_vis_ent_t
 An entity visitor object calls the given visitor functions based on entity type. More…
 

Enumerations

ldoc_struct_t {
  LDOC_NDE_RT, LDOC_NDE_UA, LDOC_NDE_H1, LDOC_NDE_H2,
  LDOC_NDE_H3, LDOC_NDE_H4, LDOC_NDE_H5, LDOC_NDE_H6,
  LDOC_NDE_PAR, LDOC_NDE_UL, LDOC_NDE_OL, LDOC_NDE_ANC,
  LDOC_NDE_NN, LDOC_NDE_OO
}
 Node types denoting the document content hierarchy and atomic concepts. More...
 
ldoc_content_t {
  LDOC_ENT_TXT, LDOC_ENT_EM1, LDOC_ENT_EM2, LDOC_ENT_URI,
  LDOC_ENT_REF, LDOC_ENT_NUM, LDOC_ENT_BL, LDOC_ENT_BR,
  LDOC_ENT_NR, LDOC_ENT_OR
}
 Entity types denoting sequentially ordered, but essentially "flat", node contents. More...
 
ldoc_serpld_t {
  LDOC_SER_CSTR, LDOC_SER_PY_INT, LDOC_SER_PY_BL, LDOC_SER_PY_FLT,
  LDOC_SER_PY_STR, LDOC_SER_PY_LST, LDOC_SER_PY_DCT
}
 Serialization payload types. More...
 

Functions

char * ldoc_py2str (PyObject *obj)
 Returns a string representation of a Python object (Python call "str(obj)"). More…
 
ldoc_doc_tldoc_pydict2doc (PyObject *dict)
 Turns a Python dictionary into a LibDocument document. More…
 
ldoc_ser_tldoc_ser_new (ldoc_serpld_t tpe)
 Allocates a new serialization object. More…
 
void ldoc_ser_free (ldoc_ser_t *ser)
 Frees the memory of a serialization object. More…
 
ldoc_vis_nde_ord_tldoc_vis_nde_ord_new ()
 Allocates a new node visitor object. More…
 
void ldoc_vis_nde_ord_free (ldoc_vis_nde_ord_t *vis_nde)
 Frees the memory of a node visitor object. More…
 
ldoc_vis_ent_tldoc_vis_ent_new ()
 Allocates a new entity visitor object. More…
 
void ldoc_vis_ent_free (ldoc_vis_ent_t *vis_ent)
 Frees the memory of an entity visitor object. More…
 
void ldoc_vis_nde_uni (ldoc_vis_nde_t *vis, ldoc_ser_t *(*vis_uni)(ldoc_nde_t *nde, ldoc_coord_t *coord))
 Uniformly populates a node visitor object with a single visitor function. More…
 
void ldoc_vis_ent_uni (ldoc_vis_ent_t *vis, ldoc_ser_t *(*vis_uni)(ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord))
 Uniformly populates an entity visitor object with a single visitor function. More…
 
ldoc_pos_tldoc_pos_new (ldoc_nde_t *nde, uint64_t nde_off, uint64_t off)
 Creates a new position object. More…
 
void ldoc_pos_free (ldoc_pos_t *pos)
 
ldoc_doc_tldoc_doc_new ()
 Create a new document. More…
 
void ldoc_doc_free (ldoc_doc_t *doc)
 Releases all allocated memory of a document (including nodes and entities). More…
 
ldoc_ent_tldoc_ent_new (ldoc_content_t tpe)
 Creates a new entity object. More…
 
void ldoc_ent_free (ldoc_ent_t *ent)
 Frees the memory of an entity object. More…
 
ldoc_nde_tldoc_nde_new (ldoc_struct_t tpe)
 Creates a new node object. More…
 
void ldoc_nde_free (ldoc_nde_t *nde)
 Deletes a node, its associated entities, and its descendant nodes. More…
 
void ldoc_nde_ent_push (ldoc_nde_t *nde, ldoc_ent_t *ent)
 Add an entity to the end of a node's entity list. More…
 
ldoc_ent_tldoc_nde_ent_pop (ldoc_nde_t *nde)
 Remove an entity at the end of a node's entity list. More…
 
void ldoc_nde_ent_shift (ldoc_nde_t *nde, ldoc_ent_t *ent)
 Add an entity to the beginning of a node's entity list. More…
 
ldoc_ent_tldoc_nde_ent_unshift (ldoc_nde_t *nde)
 Remove an entity from the beginning of a node's entity list. More…
 
void ldoc_nde_ent_ins (ldoc_ent_t *ent, ldoc_ent_t *ent_ref)
 Insert an entity before another entity in a node's entity list. More…
 
void ldoc_ent_rm (ldoc_ent_t *ent)
 Remove an entity from a document tree. More…
 
void ldoc_nde_dsc_push (ldoc_nde_t *nde, ldoc_nde_t *dsc)
 Add a node to the end of a node's descendant list. More…
 
ldoc_nde_tldoc_nde_dsc_pop (ldoc_nde_t *nde)
 Remove a node at the end of a node's descendant list. More…
 
void ldoc_nde_dsc_shift (ldoc_nde_t *nde, ldoc_nde_t *dsc)
 Add an entity to the beginning of a node's descendant list. More…
 
ldoc_nde_tldoc_nde_dsc_unshift (ldoc_nde_t *nde)
 Remove a node from the beginning of a node's descendant list. More…
 
void ldoc_nde_dsc_ins (ldoc_nde_t *dsc, ldoc_nde_t *nde_ref)
 Insert a node before another entity in a node's descendant list. More…
 
void ldoc_nde_rm (ldoc_nde_t *nde)
 Remove a node and its descendants from a document tree. More…
 
uint16_t ldoc_nde_lvl (ldoc_nde_t *nde)
 Returns the level at which a node is located at. More…
 
ldoc_ser_tldoc_format (ldoc_doc_t *doc, ldoc_vis_nde_ord_t *vis_nde, ldoc_vis_ent_t *vis_ent)
 Formats (serializes) a document using a set of node and entity visitors. More…
 
ldoc_ser_tldoc_format_json (ldoc_doc_t *doc)
 Format a document as an object in JSON. More…
 
ldoc_res_tldoc_find_anno_ent (ldoc_nde_t *nde, char *leaf)
 Find an entity object based on its annotation. More…
 
ldoc_res_tldoc_find_anno_nde (ldoc_nde_t *nde, char **pth, size_t plen)
 Find a node or entity based on a given search path. More…
 
ldoc_res_tldoc_find_anno (ldoc_doc_t *doc, char **pth, size_t plen)
 Find a node or entity in a document based on a given search path. More…
 
void ldoc_res_free (ldoc_res_t *res)
 Frees a result object. More…
 
ldoc_pos_tldoc_find_pos (ldoc_doc_t *doc, uint64_t off)
 Given a null-based cursor position that is counted from the beginning of a document, returns the document node and offset within the node that entails the cursor position. More…
 
ldoc_pos_tldoc_find_kw (ldoc_doc_t *doc, uint64_t off, char *str)
 Given a null-based cursor position that is counted from the beginning of a document as well as a search string, returns the most detailed node that captures the first match of the search string that occurs on or after the cursor position. More…
 
ldoc_ser_tldoc_vis_setup_html (void)
 Setup for HTML serialization. More…
 
ldoc_ser_tldoc_vis_teardown_html (void)
 Teardown for HTML serialization. More…
 
ldoc_ser_tldoc_vis_nde_pre_html (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for HTML serialization (prefix traversal). More…
 
ldoc_ser_tldoc_vis_nde_infx_html (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for HTML serialization (infix traversal). More…
 
ldoc_ser_tldoc_vis_nde_post_html (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for HTML serialization (postfix traversal). More…
 
ldoc_ser_tldoc_vis_ent_html (ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord)
 Entity visitor for HTML serialization. More…
 
ldoc_ser_tldoc_vis_setup_json (void)
 Setup for JSON serialization. More…
 
ldoc_ser_tldoc_vis_teardown_json (void)
 Teardown for JSON serialization. More…
 
ldoc_ser_tldoc_vis_nde_pre_json (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for JSON serialization (prefix traversal). More…
 
ldoc_ser_tldoc_vis_nde_infx_json (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for JSON serialization (infix traversal). More…
 
ldoc_ser_tldoc_vis_nde_post_json (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for JSON serialization (postfix traversal). More…
 
ldoc_ser_tldoc_vis_ent_json (ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord)
 Entity visitor for JSON serialization. More…
 
ldoc_ser_tldoc_vis_setup_py (void)
 Setup for Python object serialization. More…
 
ldoc_ser_tldoc_vis_teardown_py (void)
 Setup for Python object serialization. More…
 
ldoc_ser_tldoc_vis_nde_pre_py (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for Python object serialization (prefix traversal). More…
 
ldoc_ser_tldoc_vis_nde_infx_py (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for Python object serialization (infix traversal). More…
 
ldoc_ser_tldoc_vis_nde_post_py (ldoc_nde_t *nde, ldoc_coord_t *coord)
 Node visitor for Python object serialization (postfix traversal). More…
 
ldoc_ser_tldoc_vis_ent_py (ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord)
 Entity visitor for Python object serialization. More…
 

Variables

ldoc_doc_tLDOC_DOC_NULL
 Null pointer for document objects. More…
 
ldoc_ser_tLDOC_SER_NULL
 Null pointer for serialization objects. More…
 
ldoc_pos_tLDOC_POS_NULL
 Null pointer for cursor objects. More…
 
ldoc_nde_tLDOC_NDE_NULL
 Null pointer for node objects. More…
 
ldoc_ent_tLDOC_ENT_NULL
 Null pointer for entity objects. More…
 
ldoc_res_tLDOC_RES_NULL
 Null pointer for result objects. More…
 
ldoc_doc_anno_t LDOC_ANNO_NULL
 Null pointer for annotation objects. More…
 

Detailed Description

Functions and data structures for creating annotated documents that are treelike (cross references permitted). Serialization functions for HTML, JSON, and Python objects.

Macro Definition Documentation

#define LDOC_QSTK_FULL(qstk)(qstk->wptr + 1) % qstk->max == qstk->rptr

#define LDOC_QSTK_NDE_ADDR(qstk, n)sizeof(ldoc_nde_t*) * (n)

Typedef Documentation

typedef struct ldoc_coord_t ldoc_coord_t

Coordinate with in a document.

The coordinate level lvl denotes the distance to the root node, whilst the plane pln denotes the horizontal position within a level.

Document annotations (payloads of datum and annotation assignments).

Contains document annotations that are a pair: a datum payload and a (datum) annotation payload.

typedef struct ldoc_doc_t ldoc_doc_t

Document structure.

typedef struct ldoc_ent_t ldoc_ent_t

Document entity: a concrete piece of data that is part of a document.

A document entity is an actual representation of tangible data in a document. The entity is typed (tpe); applicable types are defined by ldoc_content_t. Data is accessed as payload (pld), whose data type depends on the entity type.

Example: a primary heading is encoded as tpe set to ldoc_content_t.H1 and ldoc_set_t.str payload set to "Heading Example".

Node quick-stack for efficiently handing node allocations/deallocations.

rptr points to a node, if unequal to wptr, or the empty stack otherwise (rptr equals wptr). wptr always points to an empty node.

typedef struct ldoc_nde_t ldoc_nde_t

A document node.

A document node that itself can be marked up with an annotation and which has exactly one parent, zero or more node descendants, and zero or more entities associated with it (latter contain data pertaining to the node).

Note: A node can hold information about multiple entities, but a document has to follow a strict hierarchical structure. Markup overlaps need to be encoded via separate entities.

typedef struct ldoc_pos_t ldoc_pos_t

A cursor position within a node.

This is a null-based cursor that points between character positions of a node (offset relative to node contents).

typedef struct ldoc_py_t ldoc_py_t

Python representation of ldoc_doc_anno_t.

Note: Removed when LDOC_NOPYTHON is defined.

Python objects that contain document annotation pairs; see also ldoc_doc_anno_t.

typedef struct ldoc_raw_t ldoc_raw_t

Raw data structure (payload pointer and data length).

Raw data representation with a payload pointer and data length in bytes.

typedef struct ldoc_res_t ldoc_res_t

Entity or node returned by a search for a specific annotation.

typedef struct ldoc_ser_t ldoc_ser_t

Serialization data structure.

A data serialization structure that is an aggregate of data payload and data type.

An entity visitor object calls the given visitor functions based on entity type.

Note: This structure and the enumerations ldoc_struct_t and ldoc_content_t go hand-in-hand. Each content type can have its own visitor, but it is also possible to re-use a single function for all content types.

Ordered visitor structure for setting up serialization, tearing it down, and pre-, in-, and postfix visits.

A node visitor object calls the given visitor functions based on node type.

Note: This structure and the enumerations ldoc_struct_t and ldoc_content_t go hand-in-hand. Each content type can have its own visitor, but it is also possible to re-use a single function for all content types.

Enumeration Type Documentation

Entity types denoting sequentially ordered, but essentially "flat", node contents.

Entities are children of nodes and represent node contents. The type of their parent node determines how they are to be interpreted, but entity types themselves permit for more fine grained control.

Enumerator

LDOC_ENT_TXT

Most general entity type; plain text.

LDOC_ENT_EM1

Emphasized text (type 1 of 2).

LDOC_ENT_EM2

Emphasized text (type 2 of 2).

LDOC_ENT_URI

An arbitrary URI.

LDOC_ENT_REF

A "reference", points to an anchor node (LDOC_NDE_ANC).

LDOC_ENT_NUM

A "number"; integer, real, or floating point.

LDOC_ENT_BL

A "boolean"; truth value.

LDOC_ENT_BR

A "boolean reference" (for example, "citation": true).

LDOC_ENT_NR

A "numbered reference" (for example, "citation": 12).

LDOC_ENT_OR

An "ontology reference"; base determined by an ontology object node (LDOC_NDE_OO).

Serialization payload types.

Payload types for serializing documents.

Enumerator

LDOC_SER_CSTR

Null-terminated string.

LDOC_SER_PY_INT

Python integer.

LDOC_SER_PY_BL

Python boolean.

LDOC_SER_PY_FLT

Python float.

LDOC_SER_PY_STR

Python string.

LDOC_SER_PY_LST

Python list.

LDOC_SER_PY_DCT

Python dictionary.

Node types denoting the document content hierarchy and atomic concepts.

Nodes are providing the main structure of a document. Their type indicates how child nodes and child entities are to be interpreted.

Enumerator

LDOC_NDE_RT

Document "root", cannot be instantiated manually.

LDOC_NDE_UA

An "unassigned" node type; for use when no other node type fits.

LDOC_NDE_H1

A "header 1", top-most header, largest header.

LDOC_NDE_H2

A "header 2", second largest header.

LDOC_NDE_H3

A "header 3", third largest header.

LDOC_NDE_H4

A "header 4", fourth largest header.

LDOC_NDE_H5

A "header 5", fifth largest header .

LDOC_NDE_H6

A "header 6", sixth largest header, smallest header.

LDOC_NDE_PAR

A node that denotes a "paragraph".

LDOC_NDE_UL

An "unordered list".

LDOC_NDE_OL

An "ordered list".

LDOC_NDE_ANC

An "anchor" within a document that is named by a single entity; linked to by a reference entity (LDOC_ENT_REF).

LDOC_NDE_NN

A "named node".

LDOC_NDE_OO

An "ontology object"; resolves ontology references (LDOC_ENT_OR).

Function Documentation

void ldoc_doc_free (ldoc_doc_t *doc)

Releases all allocated memory of a document (including nodes and entities).

Parameters and Return Value
doc
Document whose memory is being released.

ldoc_doc_t* ldoc_doc_new ()

Create a new document.

Parameters and Return Value
Returns
An empty document with a root node (LDOC_NDE_RT).

void ldoc_ent_free (ldoc_ent_t *ent)

Frees the memory of an entity object.

Note: Does not release memory of the payload.

Parameters and Return Value
ent
Entity object whose memory is being released.

ldoc_ent_t* ldoc_ent_new (ldoc_content_t tpe)

Creates a new entity object.

Parameters and Return Value
tpe
Entity type.
Returns
A new entity object with the payload set to NULL.

void ldoc_ent_rm (ldoc_ent_t *ent)

Remove an entity from a document tree.

Parameters and Return Value
ent
Entity that is being removed from a document tree.

ldoc_res_t* ldoc_find_anno (ldoc_doc_t *doc, char **pth, size_t plen)

Find a node or entity in a document based on a given search path.

Example: Searching for an entity annotation "name" in a document doc.

const char* pth[] = { "address", "name" };
ldoc_res_t* res = ldoc_find_anno(doc, pth, 2);
// Do something with `res`.
ldoc_res_free(res);
Parameters and Return Value
doc
Document object that is being searched.
pth
Search path (array of strings).
plen
Length of the search path pth.
Returns
Result object with a node or entity that matches the search, or LDOC_RES_NULL otherwise.

ldoc_res_t* ldoc_find_anno_ent (ldoc_nde_t *nde, char *leaf)

Find an entity object based on its annotation.

Example: Searching for an entity annotation "name" in a document doc.

ldoc_res_t* res = ldoc_find_anno_ent(doc->rt, "name");
// Do something with `res`.
ldoc_res_free(res);
Parameters and Return Value
nde
Node object at which the search starts.
leaf
Annotation to search for.
Returns
Result object with the entity that matches the search, or LDOC_RES_NULL otherwise.

ldoc_res_t* ldoc_find_anno_nde (ldoc_nde_t *nde, char **pth, size_t plen)

Find a node or entity based on a given search path.

Example: Searching for an entity annotation "name" in a document doc.

const char* pth[] = { "address", "name" };
ldoc_res_t* res = ldoc_find_anno_nde(doc->rt, pth, 2);
// Do something with `res`.
ldoc_res_free(res);
Parameters and Return Value
nde
Node object at which the search starts.
pth
Search path (array of strings).
plen
Length of the search path pth.
Returns
Result object with a node or entity that matches the search, or LDOC_RES_NULL otherwise.

ldoc_pos_t* ldoc_find_kw (ldoc_doc_t *doc, uint64_t off, char *str)

Given a null-based cursor position that is counted from the beginning of a document as well as a search string, returns the most detailed node that captures the first match of the search string that occurs on or after the cursor position.

The cursor offset is null-based and addresses the position between two characters. Cursor position 0 is the start of the text; the position before the first character (if any) that appears in a document. If there are n characters in the document, then position n refers to the end of the text.

Note: Will currently perform very slow on extremely long texts. Needs optimization.

ldoc_pos_t* ldoc_find_pos (ldoc_doc_t *doc, uint64_t off)

Given a null-based cursor position that is counted from the beginning of a document, returns the document node and offset within the node that entails the cursor position.

Will return the deepest node (most specialized; furthest down the tree) for the given cursor position. If the cursor position falls on a boundary – the position between two adjacent nodes – then the first node will be returned (the node for which the cursor position denotes the end of the node, rather than the beginning of the following node).

The cursor offset is null-based and addresses the position between two characters. Cursor position 0 is the start of the text; the position before the first character (if any) that appears in a document. If there are n characters in the document, then position n refers to the end of the text.

Note: Will currently perform very slow on extremely long texts. Needs optimization.

ldoc_ser_t* ldoc_format (ldoc_doc_t *doc, ldoc_vis_nde_ord_t *vis_nde, ldoc_vis_ent_t *vis_ent)

Formats (serializes) a document using a set of node and entity visitors.

Parameters and Return Value
doc
Document that is being serialized.
vis_nde
Node visitors.
vis_ent
Entity visitors.
Returns
Serialization object for document doc based on the visitors vis_nde and vis_ent.

ldoc_ser_t* ldoc_format_json (ldoc_doc_t *doc)

Format a document as an object in JSON.

Parameters and Return Value
doc
Document that is being serialized as a JSON object.
Returns
Serialization object containing the serialized version of doc as JSON object.

void ldoc_nde_dsc_ins (ldoc_nde_t *dsc, ldoc_nde_t *nde_ref)

Insert a node before another entity in a node's descendant list.

Parameters and Return Value
dsc
Node object that is being added before nde_ref in the descendant list of the node to which nde_ref belongs to.
nde_ref
Node object.

ldoc_nde_t* ldoc_nde_dsc_pop (ldoc_nde_t *nde)

Remove a node at the end of a node's descendant list.

Parameters and Return Value
nde
Node object from which the last descendant node is removed.
Returns
Node that was removed from the descendant list.

void ldoc_nde_dsc_push (ldoc_nde_t *nde, ldoc_nde_t *dsc)

Add a node to the end of a node's descendant list.

Parameters and Return Value
nde
Node object to which node dsc is added as a descendant.
dsc
Node object that is being added as a descendant to node nde.

void ldoc_nde_dsc_shift (ldoc_nde_t *nde, ldoc_nde_t *dsc)

Add an entity to the beginning of a node's descendant list.

ldoc_nde_t* ldoc_nde_dsc_unshift (ldoc_nde_t *nde)

Remove a node from the beginning of a node's descendant list.

Parameters and Return Value
nde
Node object from which the first descendant node is removed.
Returns
Node that was removed from the descendant list.

void ldoc_nde_ent_ins (ldoc_ent_t *ent, ldoc_ent_t *ent_ref)

Insert an entity before another entity in a node's entity list.

Parameters and Return Value
ent
Entity object that is being added before ent_ref in the entity list of the node to which ent_ref belongs to. @
ent_ref
Entity object.

ldoc_ent_t* ldoc_nde_ent_pop (ldoc_nde_t *nde)

Remove an entity at the end of a node's entity list.

Parameters and Return Value
nde
Node from which the entity is removed.
Returns
Entity object that was removed from the node's (nde) entity list.

void ldoc_nde_ent_push (ldoc_nde_t *nde, ldoc_ent_t *ent)

Add an entity to the end of a node's entity list.

Parameters and Return Value
nde
Node to which the entity ent is appended.
ent
Entity that is appended to the entity list of node nde.

void ldoc_nde_ent_shift (ldoc_nde_t *nde, ldoc_ent_t *ent)

Add an entity to the beginning of a node's entity list.

ldoc_ent_t* ldoc_nde_ent_unshift (ldoc_nde_t *nde)

Remove an entity from the beginning of a node's entity list.

Parameters and Return Value
nde
Node from which the entity is removed.
Returns
Entity object that was removed from the node's (nde) entity list.

void ldoc_nde_free (ldoc_nde_t *nde)

Deletes a node, its associated entities, and its descendant nodes.

Note: Does not release memory of the payload.

Parameters and Return Value
nde
Node whose memory is being released.

uint16_t ldoc_nde_lvl (ldoc_nde_t *nde)

Returns the level at which a node is located at.

The root node is located at level 0, its immediate descendants are at level 1, and so on.

Parameters and Return Value
nde
Node object for which the level is being determined.
Returns
Level of the node nde.

ldoc_nde_t* ldoc_nde_new (ldoc_struct_t tpe)

Creates a new node object.

Parameters and Return Value
tpe
Node type.
Returns
A new node object with empty entity list and no descendants.

void ldoc_nde_rm (ldoc_nde_t *nde)

Remove a node and its descendants from a document tree.

Parameters and Return Value
nde
Node that is going to be removed from its document tree.

void ldoc_pos_free (ldoc_pos_t *pos)

Releases the memory of a position object.

Parameters and Return Value
pos
Position object whose memory is being released.

ldoc_pos_t* ldoc_pos_new (ldoc_nde_t *nde, uint64_t nde_off, uint64_t off)

Creates a new position object.

Parameters and Return Value
nde
Node that is the root for the position object.
nde_off
Offset to the beginning of nde.
off
Offset within nde.
Returns
A new position object.

char* ldoc_py2str (PyObject *obj)

Returns a string representation of a Python object (Python call "str(obj)").

Note: Removed when LDOC_NOPYTHON is defined.

Parameters and Return Value
obj
Python object.
Returns
Null terminated string representation of obj.

ldoc_doc_t* ldoc_pydict2doc (PyObject *dict)

Turns a Python dictionary into a LibDocument document.

Note: Removed when LDOC_NOPYTHON is defined.

Parameters and Return Value
dict
Python dictionary.
Returns
Document representation of dict.

void ldoc_res_free (ldoc_res_t *res)

Frees a result object.

Parameters and Return Value
res
Result object whose memory is being released.

void ldoc_ser_free (ldoc_ser_t *ser)

Frees the memory of a serialization object.

Parameters and Return Value
ser
Serialization object whose memory is being released.

ldoc_ser_t* ldoc_ser_new (ldoc_serpld_t tpe)

Allocates a new serialization object.

Parameters and Return Value
tpe
Serialization type (string, integer, float, etc.).
Returns
A new serialization object with type tpe, but empty (unassigned) payload.

void ldoc_vis_ent_free (ldoc_vis_ent_t *vis_ent)

Frees the memory of an entity visitor object.

Parameters and Return Value
vis_ent
Entity visitor structor whose memory will be released.

ldoc_ser_t* ldoc_vis_ent_html (ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord)

Entity visitor for HTML serialization.

Parameters and Return Value
nde
Node object context in which the entity ent is being serialized.
ent
Entity that is being serialized as HTML.
coord
Coordinate object that determines the entity's (ent) position on a document level.
Returns
A serialization object for HTML serialization.

ldoc_ser_t* ldoc_vis_ent_json (ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord)

Entity visitor for JSON serialization.

Parameters and Return Value
nde
Node object context in which the entity ent is being serialized.
ent
Entity that is being serialized as JSON.
coord
Coordinate object that determines the entity's (ent) position on a document level.
Returns
A serialization object for JSON serialization.

ldoc_vis_ent_t* ldoc_vis_ent_new ()

Allocates a new entity visitor object.

Parameters and Return Value
Returns
A new entity visitor object (functions unassigned).

ldoc_ser_t* ldoc_vis_ent_py (ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord)

Entity visitor for Python object serialization.

Parameters and Return Value
nde
Node object context in which the entity ent is being serialized.
ent
Entity that is being serialized as Python object.
coord
Coordinate object that determines the entity's (ent) position on a document level.
Returns
A serialization object for Python object serialization.

void ldoc_vis_ent_uni (ldoc_vis_ent_t *vis, ldoc_ser_t *(*)(ldoc_nde_t *nde, ldoc_ent_t *ent, ldoc_coord_t *coord) vis_uni)

Uniformly populates an entity visitor object with a single visitor function.

Parameters and Return Value
vis
Visitor object whose function pointers are being set to vis_uni.
vis_uni
Visitor function that is being assigned to all function pointers in vis.

ldoc_ser_t* ldoc_vis_nde_infx_html (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for HTML serialization (infix traversal).

Parameters and Return Value
nde
Node object that is being serialized as HTML.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for HTML serialization.

ldoc_ser_t* ldoc_vis_nde_infx_json (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for JSON serialization (infix traversal).

Parameters and Return Value
nde
Node object that is being serialized as JSON.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for JSON serialization.

ldoc_ser_t* ldoc_vis_nde_infx_py (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for Python object serialization (infix traversal).

Parameters and Return Value
nde
Node object that is being serialized as Python object.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for Python object serialization.

void ldoc_vis_nde_ord_free (ldoc_vis_nde_ord_t *vis_nde)

Frees the memory of a node visitor object.

Parameters and Return Value
vis_nde
Visitor object whose memory is being released.

ldoc_vis_nde_ord_t* ldoc_vis_nde_ord_new ()

Allocates a new node visitor object.

Parameters and Return Value
Returns
A new visitor object (functions unassigned).

ldoc_ser_t* ldoc_vis_nde_post_html (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for HTML serialization (postfix traversal).

Parameters and Return Value
nde
Node object that is being serialized as HTML.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for HTML serialization.

ldoc_ser_t* ldoc_vis_nde_post_json (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for JSON serialization (postfix traversal).

Parameters and Return Value
nde
Node object that is being serialized as JSON.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for JSON serialization.

ldoc_ser_t* ldoc_vis_nde_post_py (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for Python object serialization (postfix traversal).

Parameters and Return Value
nde
Node object that is being serialized as Python object.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for Python object serialization.

ldoc_ser_t* ldoc_vis_nde_pre_html (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for HTML serialization (prefix traversal).

Parameters and Return Value
nde
Node object that is being serialized as HTML.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for HTML serialization.

ldoc_ser_t* ldoc_vis_nde_pre_json (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for JSON serialization (prefix traversal).

Parameters and Return Value
nde
Node object that is being serialized as JSON.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for JSON serialization.

ldoc_ser_t* ldoc_vis_nde_pre_py (ldoc_nde_t *nde, ldoc_coord_t *coord)

Node visitor for Python object serialization (prefix traversal).

Parameters and Return Value
nde
Node object that is being serialized as Python object.
coord
Coordinate object that determines the node's (nde) position on a document level.
Returns
A serialization object for Python object serialization.

void ldoc_vis_nde_uni (ldoc_vis_nde_t *vis, ldoc_ser_t *(*)(ldoc_nde_t *nde, ldoc_coord_t *coord) vis_uni)

Uniformly populates a node visitor object with a single visitor function.

Parameters and Return Value
vis
Visitor object whose function pointers are being set to vis_uni.
vis_uni
Visitor function that is being assigned to all function pointers in vis.

ldoc_ser_t* ldoc_vis_setup_html (void )

Setup for HTML serialization.

Parameters and Return Value
Returns
A serialization object initialized for HTML serialization.

ldoc_ser_t* ldoc_vis_setup_json (void )

Setup for JSON serialization.

Parameters and Return Value
Returns
A serialization object initialized for JSON serialization.

ldoc_ser_t* ldoc_vis_setup_py (void )

Setup for Python object serialization.

Parameters and Return Value
Returns
A serialization object initialized for Python object serialization.

ldoc_ser_t* ldoc_vis_teardown_html (void )

Teardown for HTML serialization.

Parameters and Return Value
Returns
A serialization object with closing statements for HTML serialization.

ldoc_ser_t* ldoc_vis_teardown_json (void )

Teardown for JSON serialization.

Parameters and Return Value
Returns
A serialization object with closing statements for JSON serialization.

ldoc_ser_t* ldoc_vis_teardown_py (void )

Setup for Python object serialization.

Parameters and Return Value
Returns
A serialization object initialized for Python object serialization.

Variable Documentation

ldoc_doc_anno_t LDOC_ANNO_NULL

Null pointer for annotation objects.

ldoc_doc_t* LDOC_DOC_NULL

Null pointer for document objects.

ldoc_ent_t* LDOC_ENT_NULL

Null pointer for entity objects.

ldoc_nde_t* LDOC_NDE_NULL

Null pointer for node objects.

ldoc_pos_t* LDOC_POS_NULL

Null pointer for cursor objects.

ldoc_res_t* LDOC_RES_NULL

Null pointer for result objects.

ldoc_ser_t* LDOC_SER_NULL

Null pointer for serialization objects.