Get cooccurrence statistics.

cooccurrences(.Object, ...)

# S4 method for corpus
cooccurrences(
  .Object,
  query,
  cqp = is.cqp,
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  stoplist = NULL,
  positivelist = NULL,
  regex = FALSE,
  keep = NULL,
  cpos = NULL,
  method = "ll",
  mc = getOption("polmineR.mc"),
  verbose = FALSE,
  progress = FALSE,
  ...
)

# S4 method for character
cooccurrences(
  .Object,
  query,
  cqp = is.cqp,
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  stoplist = NULL,
  positivelist = NULL,
  regex = FALSE,
  keep = NULL,
  cpos = NULL,
  method = "ll",
  mc = getOption("polmineR.mc"),
  verbose = FALSE,
  progress = FALSE,
  ...
)

# S4 method for slice
cooccurrences(
  .Object,
  query,
  cqp = is.cqp,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  stoplist = NULL,
  positivelist = NULL,
  keep = NULL,
  method = "ll",
  mc = FALSE,
  progress = TRUE,
  verbose = FALSE,
  ...
)

# S4 method for partition
cooccurrences(
  .Object,
  query,
  cqp = is.cqp,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  stoplist = NULL,
  positivelist = NULL,
  keep = NULL,
  method = "ll",
  mc = FALSE,
  progress = TRUE,
  verbose = FALSE,
  ...
)

# S4 method for subcorpus
cooccurrences(
  .Object,
  query,
  cqp = is.cqp,
  left = getOption("polmineR.left"),
  right = getOption("polmineR.right"),
  p_attribute = getOption("polmineR.p_attribute"),
  boundary = NULL,
  stoplist = NULL,
  positivelist = NULL,
  keep = NULL,
  method = "ll",
  mc = FALSE,
  progress = TRUE,
  verbose = FALSE,
  ...
)

# S4 method for context
cooccurrences(.Object, method = "ll", verbose = FALSE)

# S4 method for partition_bundle
cooccurrences(.Object, query, mc = getOption("polmineR.mc"), ...)

# S4 method for Cooccurrences
cooccurrences(.Object, query)

# S4 method for remote_corpus
cooccurrences(.Object, ...)

# S4 method for remote_subcorpus
cooccurrences(.Object, ...)

Arguments

.Object

A partition object, or a character vector with a CWB corpus.

...

Further parameters that will be passed into bigmatrix (applies only of big = TRUE).

query

A query, either a character vector to match a token, or a CQP query.

cqp

Defaults to is.cqp-function, or provide TRUE/FALSE; relevant only if query is not NULL.

p_attribute

The p-attribute of the tokens/the query.

boundary

If provided, it will be checked that the corpus positions of windows do not extend beyond the left and right boundaries of the region defined by the s-attribute where the match occurs.

left

Number of tokens to the left of the query match.

right

Number of tokens to the right of the query match.

stoplist

Exclude a query hit from analysis if stopword(s) is/are in context (relevant only if query is not NULL).

positivelist

Character vector or numeric vector: include a query hit only if token in positivelist is present. If positivelist is a character vector, it is assumed to provide regex expressions (incredibly long if the list is long) (relevant only if query is nut NULL)

regex

A logical value, whether stoplist/positivelist are interpreted as regular expressions.

keep

list with tokens to keep

cpos

integer vector with corpus positions, defaults to NULL - then the corpus positions for the whole corpus will be used

method

The statistical test(s) to use (defaults to "ll").

mc

whether to use multicore

verbose

A logical value, whether to be verbose.

progress

A logical value, whether to output progress bar.

Value

a cooccurrences-class object

References

Baker, Paul (2006): Using Corpora in Discourse Analysis. London: continuum, p. 95-120 (ch. 5).

Manning, Christopher D.; Schuetze, Hinrich (1999): Foundations of Statistical Natural Language Processing. MIT Press: Cambridge, Mass., pp. 151-189 (ch. 5).

See also

See the documentation for the ll-method for an explanation of the computation of the log-likelihood statistic.

Author

Andreas Blaette

Examples

use("polmineR")
#> ... activating corpus: GERMAPARLMINI (version: 0.0.1 | build date: 2019-02-23)
#> ... activating corpus: REUTERS
merkel <- partition("GERMAPARLMINI", interjection = "speech", speaker = ".*Merkel", regex = TRUE)
#> ... get encoding: latin1
#> ... get cpos and strucs
merkel <- enrich(merkel, p_attribute = "word")
#> ... getting counts for p-attribute(s): word
cooc <- cooccurrences(merkel, query = "Deutschland") # use subset-method to filter results a <- cooccurrences("REUTERS", query = "oil") b <- subset(a, !is.na(ll)) c <- subset(b, !word %in% tm::stopwords("en")) d <- subset(c, count_coi >= 5) e <- subset(c, ll >= 10.83) format(e)
#> word count_coi count_ref exp_coi exp_ref ll rank_ll #> 1: prices 27 20 9.23 37.77 33.05 1 #> 2: crude 13 7 3.93 16.07 19.62 2 #> 3: industry 8 2 1.96 8.04 16.97 3 #> 4: recent 5 1 1.18 4.82 11.33 4
# using pipe operator may be convenient if (require(magrittr)){ cooccurrences("REUTERS", query = "oil") %>% subset(!is.na(ll)) %>% subset(!word %in% tm::stopwords("en")) %>% subset(count_coi >= 5) %>% subset(ll >= 10.83) %>% format() }
#> Loading required package: magrittr
#> word count_coi count_ref exp_coi exp_ref ll rank_ll #> 1: prices 27 20 9.23 37.77 33.05 1 #> 2: crude 13 7 3.93 16.07 19.62 2 #> 3: industry 8 2 1.96 8.04 16.97 3 #> 4: recent 5 1 1.18 4.82 11.33 4
pb <- partition_bundle("GERMAPARLMINI", s_attribute = "speaker") pb_min <- pb[[ count(pb, query = "Deutschland")[Deutschland >= 25][["partition"]] ]] y <- cooccurrences(pb_min, query = "Deutschland") if (interactive()) y[[1]] if (interactive()) y[[2]] y2 <- corpus("GERMAPARLMINI") %>% subset(speaker %in% c("Hubertus Heil", "Angela Dorothea Merkel")) %>% split(s_attribute = "speaker") %>% cooccurrences(query = "Deutschland")