Get the total number of unique tokens/ids of a positional attribute. Note
that token ids are zero-based, i.e. when iterating through tokens, start at
0, the maximum will be cl_lexicon_size()
minus 1.
cl_lexicon_size(corpus, p_attribute, registry = Sys.getenv("CORPUS_REGISTRY"))
name of a CWB corpus (upper case)
name of positional attribute
path to the registry directory, defaults to the value of the environment variable CORPUS_REGISTRY
lexicon_size <- cl_lexicon_size(
"REUTERS",
p_attribute = "word",
registry = get_tmp_registry()
)
token_ids <- seq.int(from = 0, to = lexicon_size - 1)
cl_id2str(
"REUTERS",
p_attribute = "word",
id = token_ids,
registry = get_tmp_registry()
)
#> [1] "Diamond" "Shamrock" "Corp" "said"
#> [5] "that" "effective" "today" "it"
#> [9] "had" "cut" "its" "contract"
#> [13] "prices" "for" "crude" "oil"
#> [17] "by" "1.50" "dlrs" "a"
#> [21] "barrel" "The" "reduction" "brings"
#> [25] "posted" "price" "West" "Texas"
#> [29] "Intermediate" "to" "16.00" "the"
#> [33] "copany" "was" "made" "in"
#> [37] "light" "of" "falling" "product"
#> [41] "and" "weak" "market" "company"
#> [45] "spokeswoman" "is" "latest" "line"
#> [49] "U.S" "companies" "have" "or"
#> [53] "over" "last" "two" "days"
#> [57] "citing" "markets" "Reuter" "OPEC"
#> [61] "may" "be" "forced" "meet"
#> [65] "before" "scheduled" "June" "session"
#> [69] "readdress" "production" "cutting" "agreement"
#> [73] "if" "organization" "wants" "halt"
#> [77] "current" "slide" "industry" "analysts"
#> [81] "movement" "higher" "never" "as"
#> [85] "easy" "thought" "They" "need"
#> [89] "an" "emergency" "meeting" "sort"
#> [93] "out" "problems" "Daniel" "Yergin"
#> [97] "director" "Cambridge" "Energy" "Research"
#> [101] "Associates" "CERA" "Analysts" "sources"
#> [105] "problem" "faces" "excess" "supply"
#> [109] "world" "OPEC's" "not" "but"
#> [113] "issue" "must" "addressed" "way"
#> [117] "Paul" "Mlotok" "analyst" "with"
#> [121] "Salomon" "Brothers" "Inc" "He"
#> [125] "market's" "earlier" "optimism" "about"
#> [129] "ability" "keep" "under" "control"
#> [133] "given" "pessimistic" "outlook" "address"
#> [137] "soon" "wishes" "regain" "initiative"
#> [141] "But" "some" "other" "were"
#> [145] "uncertain" "even" "would" "above"
#> [149] "15.8" "mln" "bpd" "quota"
#> [153] "set" "December" "has" "learn"
#> [157] "buyers" "you" "cannot" "deemed"
#> [161] "quotas" "fixed" "differentials" "regional"
#> [165] "manager" "one" "major" "who"
#> [169] "spoke" "on" "condition" "he"
#> [173] "named" "now" "trying" "teach"
#> [177] "them" "lesson" "again" "added"
#> [181] "David" "T" "Mizrahi" "editor"
#> [185] "Mideast" "reports" "expects" "although"
#> [189] "immediately" "However" "optimistic" "can"
#> [193] "principal" "will" "they" "try"
#> [197] "take" "advantage" "winter" "demand"
#> [201] "sell" "their" "late" "March"
#> [205] "April" "when" "slackens" "unlikely"
#> [209] "do" "anything" "more" "than"
#> [213] "reiterate" "output" "at" "next"
#> [217] "months" "critical" "hold" "together"
#> [221] "pact" "six" "eight" "weeks"
#> [225] "since" "come" "back" "into"
#> [229] "then" "Dillard" "Spriggs" "Petroleum"
#> [233] "Analysis" "Ltd" "New" "York"
#> [237] "Bijan" "Moussavar" "Rahmani" "Harvard"
#> [241] "University's" "Environment" "Policy" "Center"
#> [245] "been" "rising" "through" "first"
#> [249] "quarter" "this" "prompted" "excesses"
#> [253] "Demand" "clearly" "probably" "closer"
#> [257] "17" "so" "what" "we"
#> [261] "are" "seeing" "characterized" "cheating"
#> [265] "told" "Reuters" "telephone" "interview"
#> [269] "Texaco" "Canada" "lowered" "pay"
#> [273] "64" "Canadian" "cts" "decrease"
#> [277] "company's" "benchmark" "grade" "Edmonton"
#> [281] "Swann" "Hills" "Light" "Sweet"
#> [285] "22.26" "bbl" "changed" "postings"
#> [289] "Feb" "19" "Marathon" "Co"
#> [293] "reduced" "all" "grades" "dlr"
#> [297] "Marathon's" "both" "Sour" "16.50"
#> [301] "South" "Louisiana" "16.85" "Jan"
#> [305] "12" "Houston" "Oil" "Trust"
#> [309] "independent" "petroleum" "engineers" "completed"
#> [313] "annual" "study" "estimates" "trust's"
#> [317] "future" "net" "revenues" "from"
#> [321] "total" "proved" "reserves" "88"
#> [325] "discounted" "present" "value" "Based"
#> [329] "estimate" "trust" "there" "no"
#> [333] "money" "available" "cash" "distributions"
#> [337] "unitholders" "remainder" "year" "It"
#> [341] "reflect" "44" "pct" "reserve"
#> [345] "39" "compared" "1985" "Kuwait"
#> [349] "s" "Minister" "remarks" "published"
#> [353] "plans" "review" "policies" "after"
#> [357] "recent" "weakness" "Sheikh" "Ali"
#> [361] "al" "Khalifa" "Sabah" "quoted"
#> [365] "local" "daily" "Qabas" "saying"
#> [369] "None" "members" "asked" "such"
#> [373] "denied" "pumping" "948,000" "barrels"
#> [377] "self" "imposed" "limits" "13"
#> [381] "nation" "organisation" "Traders" "international"
#> [385] "producing" "up" "ceiling" "agreed"
#> [389] "Geneva" "United" "Arab" "Emirates"
#> [393] "along" "much" "smaller" "producer"
#> [397] "Ecuador" "among" "those" "1.2"
#> [401] "This" "rumour" "baseless" "based"
#> [405] "which" "exceed" "share" "suppose"
#> [409] "because" "minister" "produce" "4.0"
#> [413] "If" "our" "official" "while"
#> [417] "countries" "suffering" "difficulties" "marketing"
#> [421] "means" "unusually" "clever" "referring"
#> [425] "apparently" "Gulf" "state" "qatar"
#> [429] "selling" "less" "180,000" "285,000"
#> [433] "resisting" "restored" "month" "pegged"
#> [437] "marker" "18" "per" "Prices"
#> [441] "week" "dropped" "lowest" "levels"
#> [445] "almost" "three" "dollars" "below"
#> [449] "high" "also" "delivered" "challenge"
#> [453] "any" "declared" "sold" "Because"
#> [457] "charging" "16.67" "lost" "custom"
#> [461] "did" "elaborate" "guaranteed" "refining"
#> [465] "facilities" "own" "distribution" "network"
#> [469] "abroad" "reaffirmed" "planned" "7"
#> [473] "committee" "postponed" "until" "start"
#> [477] "request" "certain" "body" "deputy"
#> [481] "energy" "Fernando" "Santos" "Alvite"
#> [485] "Wednesday" "his" "debt" "burdened"
#> [489] "country" "wanted" "assign" "lower"
#> [493] "seek" "talks" "opec" "pricing"
#> [497] "Referring" "pressure" "apparent" "reference"
#> [501] "faced" "Qatar" "We" "expected"
#> [505] "continue" "situation" "later" "improve"
#> [509] "REUTER" "Indonesia" "appears" "nearing"
#> [513] "political" "crossroads" "measures" "deregulate"
#> [517] "protected" "economy" "Embassy" "says"
#> [521] "new" "report" "To" "counter"
#> [525] "government" "launched" "series" "past"
#> [529] "nine" "boost" "exports" "outside"
#> [533] "sector" "attract" "investment" "only"
#> [537] "Asian" "member" "leading" "primary"
#> [541] "commodity" "severely" "hit" "fall"
#> [545] "devalue" "currency" "31" "September"
#> [549] "President" "Suharto" "divided" "direction"
#> [553] "lead" "regard" "deregulation" "pertains"
#> [557] "investments" "imports" "primarily" "assesses"
#> [561] "agricultural" "reviews" "general" "economic"
#> [565] "performance" "many" "officials" "advisers"
#> [569] "recommending" "further" "relaxation" "equally"
#> [573] "strong" "pressures" "being" "exerted"
#> [577] "moves" "group" "strongly" "favours"
#> [581] "import" "substitution" "changes" "welcomed"
#> [585] "World" "Bank" "bankers" "steps"
#> [589] "right" "though" "say" "crucial"
#> [593] "areas" "like" "plastics" "steel"
#> [597] "remain" "highly" "virtual" "monopolies"
#> [601] "Three" "sets" "announced" "May"
#> [605] "broadened" "foreign" "trade" "restrictions"
#> [609] "liberalised" "growth" "calendar" "1986"
#> [613] "zero" "contracted" "bit" "rate"
#> [617] "mid" "1960s" "notes" "largest"
#> [621] "East" "Asia" "population" "168"
#> [625] "million" "facing" "elections" "little"
#> [629] "hope" "swift" "improvement" "For"
#> [633] "1987" "early" "indications" "point"
#> [637] "slightly" "positive" "exceeding" "Economic"
#> [641] "activity" "continues" "suffer" "due"
#> [645] "sharp" "export" "earnings" "Growth"
#> [649] "non" "low" "domestic" "coupled"
#> [653] "excessive" "plant" "capacity" "real"
#> [657] "declines" "construction" "level" "agriculture"
#> [661] "states" "Bankers" "continuation" "reforms"
#> [665] "get" "lending" "needs" "A"
#> [669] "loan" "300" "balance" "payments"
#> [673] "support" "partly" "help" "maintain"
#> [677] "momentum" "reform" "Saudi" "riyal"
#> [681] "interbank" "deposits" "steady" "yesterday's"
#> [685] "quiet" "reluctant" "positions" "amidst"
#> [689] "uncertainty" "whether" "succeed" "halting"
#> [693] "decline" "yesterday" "several" "producers"
#> [697] "difficulty" "traditional" "Sunday" "lull"
#> [701] "trading" "European" "weekend" "contributed"
#> [705] "lack" "Spot" "rates" "put"
#> [709] "6" "1" "4" "5"
#> [713] "3" "quotes" "ranging" "between"
#> [717] "seven" "One" "unchanged" "8"
#> [721] "respectively" "spot" "quietly" "firmer"
#> [725] "3.7495" "98" "dollar" "3.7500"
#> [729] "03" "recovering" "year's" "budget"
#> [733] "projected" "deficit" "5.472" "billion"
#> [737] "riyals" "shortfall" "7.3" "86"
#> [741] "In" "statement" "outlining" "fiscal"
#> [745] "beginning" "Finance" "Abdul" "Aziz"
#> [749] "bin" "Thani" "spend" "12.217"
#> [753] "period" "Projected" "expenditure" "15.6"
#> [757] "revenue" "6.745" "down" "30"
#> [761] "9.7" "failed" "publish" "87"
#> [765] "surrounding" "during" "decided" "limit"
#> [769] "recurrent" "each" "twelfth" "previous"
#> [773] "allocations" "minus" "15" "urged"
#> [777] "heads" "departments" "public" "institutions"
#> [781] "rationalise" "how" "covered" "taken"
#> [785] "order" "relieve" "burden" "placed"
#> [789] "country's" "2.766" "allocated" "projects"
#> [793] "including" "housing" "buildings" "social"
#> [797] "services" "health" "education" "transport"
#> [801] "communications" "electricity" "water" "No"
#> [805] "figure" "revealed" "defence" "security"
#> [809] "There" "projection" "day" "Our"
#> [813] "expectations" "signs" "regarding" "trends"
#> [817] "foremost" "determination" "shoulder" "responsibilites"
#> [821] "protect" "wealth" "helped" "us"
#> [825] "make" "reasonable" "coming" "basis"
#> [829] "assigned" "Arabian" "Hisham" "Nazer"
#> [833] "reiterated" "kingdom's" "commitment" "December's"
#> [837] "accord" "stabilise" "Press" "Agency"
#> [841] "SPA" "Asked" "agency" "free"
#> [845] "Arabia" "fully" "adhering" "Accord"
#> [849] "pronounced" "circumstance" "end" "northern"
#> [853] "hemisphere" "season" "glut" "main"
#> [857] "architect" "7.25" "return" "around"
#> [861] "followed" "turmoil" "saw" "slump"
#> [865] "briefly" "10" "Free" "currently"
#> [869] "just" "16" "Arabia's" "adherence"
#> [873] "shown" "contacts" "showed" "stick"
#> [877] "Jamaica" "Rilwanu" "Lukman" "Nigerian"
#> [881] "aware" "negative" "forces" "manipulate"
#> [885] "operations" "satisfied" "fundamentals" "exist"
#> [889] "stable" "conditions" "Kuwait's" "emirate's"
#> [893] "Al" "fell" "average" "3.5"
#> [897] "3.8" "January" "Ras" "Tanurah"
#> [901] "Ju'aymah" "terminals" "1.9" "2.2"
#> [905] "liftings" "customers" "drop" "rallied"
#> [909] "fourth" "February" "2.5" "third"
#> [913] "figures" "include" "neutral" "zone"
#> [917] "sales" "floating" "storage" "generally"
#> [921] "considered" "part" "Opec" "purposes"
#> [925] "4.133" "restraint" "scheme" "approved"
#> [929] "averaging" "fold" "jump" "appeared"
#> [933] "result" "rushing" "lift" "entitlements"
#> [937] "Last" "week's" "show" "continued"
#> [941] "four" "ex" "partners" "Aramco"
#> [945] "Exxon" "XON" "Mobil" "MOB"
#> [949] "TX" "Chevron" "CHV" "signed"
#> [953] "long" "term" "buy" "17.52"
#> [957] "test" "products" "traditionally" "tapers"
#> [961] "off" "fallen" "refinery" "throughput"
#> [965] "1.1" "cuts" "Yanbu" "Jubail"
#> [969] "refineries" "100,000" "Bahrain's" "remained"
#> [973] "200,000" "Deputy" "ministers" "Bahrain"
#> [977] "discuss" "coordination" "news" "WAM"
#> [981] "reported" "discussing" "implementation" "Sunday's"
#> [985] "Doha" "Cooperation" "Council" "GCC"
#> [989] "Four" "UAE" "Organiaation" "Exporting"
#> [993] "Countries" "face" "stiff" "buyer"
#> [997] "resistance" "stabilize" "newspaper" "none"
#> [1001] "Crude" "sharply" "traders" "estimated"
#> [1005] "port" "Philadelphia" "closed" "Cypriot"
#> [1009] "tanker" "Seapride" "II" "ran"
#> [1013] "aground" "hitting" "200" "foot"
#> [1017] "tower" "supporting" "power" "lines"
#> [1021] "across" "river" "Coast" "Guard"
#> [1025] "spokesman" "spill" "ship" "lodged"
#> [1029] "rocks" "opposite" "Hope" "Creek"
#> [1033] "nuclear" "Jersey" "hoped" "refloat"
#> [1037] "tide" "After" "delivering" "Paulsboro"
#> [1041] "steering" "transmission" "carrying" "Delaware"
#> [1045] "States" "should" "increase" "strategic"
#> [1049] "deal" "impact" "policy" "raise"
#> [1053] "750" "500" "overseas" "embargo"
#> [1057] "rise" "Aspen" "Institute" "Humanistic"
#> [1061] "Studies" "private" "called" "research"
#> [1065] "exploration" "development" "techniques" "predicted"
#> [1069] "years" "20s" "consumption" "instead"
#> [1073] "increasing" "guard" "against" "mitigate"
#> [1077] "risks" "increased" "cited" "basic"
#> [1081] "paths" "fee" "device" "accept"
#> [1085] "full" "benefits" "cheap" "either"
#> [1089] "option" "drawbacks" "Unocal" "Corp's"
#> [1093] "Union" "eastern" "region" "26"
#> [1097] "16.35" "Union's" "Mercantile" "Exchange"
#> [1101] "debut" "procedure" "complex" "use"
#> [1105] "futures" "worldwide" "On" "NYMEX"
#> [1109] "allow" "position" "initiate" "exchange"
#> [1113] "closes" "transaction" "subsequently" "hedged"
#> [1117] "according" "change" "transacted" "Thomas"
#> [1121] "McKiernan" "chairman" "Foreign" "able"
#> [1125] "hedge" "trades" "opens" "negotiate"
#> [1129] "differential" "explained" "expanded" "program"
#> [1133] "serve" "does" "close" "Frank"
#> [1137] "Capozza" "secretary" "Century" "Resources"
#> [1141] "rule" "already" "effect" "platinum"
#> [1145] "open" "interest" "liquidity" "Currently"
#> [1149] "least" "trader" "physical" "EFP"
#> [1153] "entering" "Under" "arrangement" "neither"
#> [1157] "party" "parties" "offset" "When"
#> [1161] "proposed" "Rosemary" "McFadden" "Expansion"
#> [1165] "provision" "add" "globalization" "providing"
#> [1169] "24" "hour" "Commodity" "Futures"
#> [1173] "Trading" "Commission" "CFTC" "Argentine"
#> [1177] "10.8" "12.32" "13.81" "Yacimientos"
#> [1181] "Petroliferos" "Fiscales" "natural" "gas"
#> [1185] "totalled" "1.15" "cubic" "metrers"
#> [1189] "3.6" "1.11" "metres" "produced"