Words Frequencies

By gennarino on Sep 05, 2019

Words Frequencies using hash table!
I wrote this script to learn hash tables.
Copy the source in your script editor and you get a new menu tab called Words.
The Top10 and Bottom10 aliases where taken from the official mirc documentation

;
; Collects words and frequencies. Last Modified: January, 19, 2021
; 
;                    By gennarino
;
; Creates Hash table Words => Frequencies
;

On *:Start:{
  if (!$hget(Words)) { hmake Words 100 }
  if ($isfile($scriptdir/Words.hsh)) { hload  Words $scriptdir/Words.hsh }
}

On *:Exit: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }
On *:Disconnect: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }

;End Hash Table handler

on *:TEXT:*:#: {
  if ( http isin $1- ) halt   ; skips sentences containing links
  ;Removes all control codes (bold/underline/italics/color/reverse)
  ;echo 4 -a $1-
  var %sentence = $strip($1-,buricmo)
  ;echo 4 -a %sentence 
  %sentence = $replace(%sentence,#,$chr(32),?,$chr(32),.,$chr(32),!,$chr(32),",$chr(32),',$chr(32),$chr(44),$chr(32),:,$chr(32),$chr(40),$chr(32),$chr(41),$chr(32),/,$chr(32))
  ; echo 4 -a { %sentence }
  var %Lengn = 0
  var %k = 1
  var %aw = $NULL
  set %single $NULL
  tokenize 32 %sentence
  while (  %k <= $0 ) { 
    %aw  = $ [ $+ [ %k ] ]
    %Lengn = $len(%aw)
    if (%Lengn > 3) {           ; Accept only words greater than 3
      if ($hget(Words,%aw)) { 
        hinc Words %aw 1 
      }
      else { 
        hadd Words %aw 1  
        %single = %aw
        ;       echo 8 -a  %single 
      }
    }
    inc %k
  }
}

menu * {

  ♡  --- Words --> 
  .-
  .Save Hash Table: .hsave Words $scriptdir/Words.hsh
  .Save Ini File: .hsave -i Words $scriptdir/Words.ini
  .Show Top10: .top10
  .Show Bottom10: .bottom10
  .Show LastUnknown: //echo -a Last single word used:  %single
  .Prune lowest: Wprune $?="Enter Upper limit"
  .Search Word: Wget $?="Enter word: "

  ; .Print All: .print_All_Words  ; Warning: could hang MIRC
  .-
}

Alias print_All_Words {
  var %i = 1
  echo -a Words Table:
  ; iterate over each item
  while ($hget(Words, %i).item) {
    echo -a %i $+ ) $v1 => $hget(Words, $v1)
    inc %i
  }
}

alias top10 {
  hsave -no Words top10.txt
  filter -ffcteun 1 32 top10.txt top10.txt
  var %i = 1 
  while (%i <= 10) {
    var %top10.item = $hget(Words,$gettok($read(top10.txt,nt,%i),1,32)).item
    set %top10 %top10 %top10.item  ( $+ $hget(Words,%top10.item) $+ )
    inc %i
  }
  echo -a TOP 10: $replace(%top10,$chr(32),$+($chr(44),$chr(32)))
  unset %top10
}

alias bottom10 {
  hsave -no Words bottom10.txt
  filter -ffctun 1 32 bottom10.txt bottom10.txt
  var %i = 1 
  while (%i <= 10) {
    var %bottom10.item = $hget(Words,$gettok($read(bottom10.txt,nt,%i),1,32)).item
    set %bottom10 %bottom10 %bottom10.item ( $+ $hget(Words,%bottom10.item) $+ )
    inc %i
  }
  echo -a BOTTOM 10: $replace(%bottom10,$chr(32),$+($chr(44),$chr(32)))
  unset %bottom10
}
alias wdel hdel Words $$1 | echo 4 -a $$1 => deleted
alias wget if ($hget(Words,$$1)) //say The word 4 $$1 has been used 4 $hget(Words, $1) times!

alias Wprune {
  if ( $1 ) var %q = $1
  else var %q = 2
  var %c = 1
  var %p = 0
  var %key = $hget(Words, %c).item
  var %data = $hget(Words, %c).data
  echo -a You have $hget(Words, 0).item items in your hash table
  while ( %key ) {
    if ( %data < %q ) {
      hdel Words %key 
      inc %p 1
    }
    inc %c 1
    %key = $hget(Words, %c).item
    %data = $hget(Words, %c).data
  }
  echo 4 -a Pruned %p entries
  hsave Words $scriptdir/Words.hsh 
}

That's all folks ....

Comments

Sign in to comment.
Are you sure you want to unfollow this person?
Are you sure you want to delete this?
Click "Unsubscribe" to stop receiving notices pertaining to this post.
Click "Subscribe" to resume notices pertaining to this post.