Prepare html document to see full text.

html(object, ...)

# S4 method for character
html(object, corpus, height = NULL)

# S4 method for partition
html(
  object,
  meta = NULL,
  cpos = TRUE,
  verbose = FALSE,
  cutoff = NULL,
  charoffset = FALSE,
  beautify = TRUE,
  height = NULL,
  ...
)

# S4 method for subcorpus
html(
  object,
  meta = NULL,
  cpos = TRUE,
  verbose = FALSE,
  cutoff = NULL,
  charoffset = FALSE,
  beautify = FALSE,
  height = NULL,
  ...
)

# S4 method for partition_bundle
html(
  object,
  charoffset = FALSE,
  beautify = TRUE,
  height = NULL,
  progress = TRUE,
  ...
)

# S4 method for kwic
html(object, i, s_attribute = NULL, type = NULL, verbose = FALSE, ...)

Arguments

object

The object the fulltext output will be based on.

...

Further parameters that are passed into as.markdown.

corpus

The ID of the corpus, a length-one character vector.

height

A character vector that will be inserted into the html as an optional height of a scroll box.

meta

Metadata to include in output, if NULL (default), the s-attributes defining a partition will be used.

cpos

Length-one logical value, if TRUE (default), all tokens will be wrapped by elements with id attribute indicating corpus positions.

verbose

Length-one logical value, whether to output progress messages.

cutoff

An integer value, maximum number of tokens to decode from token stream, passed into as.markdown.

charoffset

Length-one logical value, if TRUE, character offset positions are added to elements embracing tokens.

beautify

Length-one logical value, if TRUE, whitespace before interpunctuation will be removed.

progress

Length-one logical value, whether to output progress# bar.

i

An integer value: If object is a kwic-object, the index of the concordance for which the fulltext is to be generated.

s_attribute

Structural attributes that will be used to define the partition where the match occurred.

type

The partition type.

Value

Returns an object of class html as used in the htmltools package. Methods such as htmltools::html_print will be available. The encoding of the html document will be UTF-8 on all systems (including Windows).

Details

If param charoffset is TRUE, character offset positions will be added to tags that embrace tokens. This may be useful, if exported html document is annotated with a tool that stores annotations with character offset positions.

Examples

use("polmineR")
#> ... activating corpus: GERMAPARLMINI (version: 0.0.1 | build date: 2019-02-23)
#> ... activating corpus: REUTERS
P <- partition("REUTERS", places = "argentina")
#> ... get encoding: latin1
#> ... get cpos and strucs
H <- html(P) if (interactive()) H # show full text in viewer pane # html-method can be used in a pipe H <- partition("REUTERS", places = "argentina") %>% html()
#> ... get encoding: latin1
#> ... get cpos and strucs
# use html-method to get full text where concordance occurrs K <- kwic("REUTERS", query = "barrels") H <- html(K, i = 1, s_attribute = "id")
#> ... get encoding: latin1
#> ... get cpos and strucs
H <- html(K, i = 2, s_attribute = "id")
#> ... get encoding: latin1
#> ... get cpos and strucs
for (i in 1L:length(K)) { H <- html(K, i = i, s_attribute = "id") if (interactive()){ show(H) userinput <- readline("press 'q' to quit or any other key to continue") if (userinput == "q") break } }
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs
#> ... get encoding: latin1
#> ... get cpos and strucs