from APIsearch import search, get_capture_groups, top_n
from collo_measures import cca, dca, rank_collo
cql = '[word="把" & pos="P"] [pos!="N[abcd].*|COMMACATEGORY|PERIODCATEGORY"]* obj:[pos="N[abcd].*"] v:[pos="V.*"]'
search_results, requested_urls = search(cql, board="Boy-Girl", year_from=2019, year_to=2019, number=None)
len(search_results)
freq_table = {}
for hit in search_results:
gramrel = get_capture_groups(hit)
Obj, Act = gramrel['obj'][0], gramrel['v'][0]
k = (Obj, Act)
if k not in freq_table:
freq_table[k] = 0
freq_table[k] += 1
len(freq_table)
top_n(freq_table, 15)
cca_results = cca(freq_table)
rank_collo(cca_results, sort_by='G2', freq_cutoff=3)[:15]
rank_collo(cca_results, sort_by='fisher_exact', freq_cutoff=3)[:15]
cql = 'construction:[word="將|把" & pos="P"] [pos!="N[abcd].*|COMMACATEGORY|PERIODCATEGORY"]* obj:[pos="N[abcd].*"] v:[pos="V.*"]'
search_results, requested_urls = search(cql, board="Boy-Girl", year_from=2019, year_to=2019, number=None)
len(search_results)
freq_table = {'把': {}, '將': {}}
for hit in search_results:
gramrel = get_capture_groups(hit)
Type, Obj, Act = gramrel['construction'][0], gramrel['obj'][0], gramrel['v'][0]
if Act not in freq_table[Type]:
freq_table[Type][Act] = 0
freq_table[Type][Act] += 1
dca_results = dca(freq_table)
rank_collo(dca_results, sort_by='G2', freq_cutoff=3)[:10]
rank_collo(dca_results, sort_by='G2', freq_cutoff=3)[-1:-11:-1]
freq_table = {'把': {}, '將': {}}
for hit in search_results:
gramrel = get_capture_groups(hit)
Type, Obj, Act = gramrel['construction'][0], gramrel['obj'][0], gramrel['v'][0]
if Obj not in freq_table[Type]:
freq_table[Type][Obj] = 0
freq_table[Type][Obj] += 1
dca_results = dca(freq_table)
rank_collo(dca_results, sort_by='G2', freq_cutoff=3)[:10]
rank_collo(dca_results, sort_by='G2', freq_cutoff=3)[-1:-11:-1]