Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Create a tokenizer in Ruby. Your tokenizer will take two command line arguments:

ID: 3877476 • Letter: C

Question

Create a tokenizer in Ruby.

Your tokenizer will take two command line arguments: The first will be a grammar specification and the second will be a file to tokenize. Tokenize the file and print the tokens (symbol, lexeme, line) to the screen. If the file cannot be tokenized, print an error message identifying the line with the error.

sample files

File 1:

NUM -> d+
ADDOP -> [-+]
MULOP -> [*/]
LP -> (
RP -> )
EQ -> =
ID -> [A-Z]w*
comment -> {[^}]*}

S -> ID EQ expr
expr -> expr ADDOP term | term
term -> term MULOP factor | factor
factor -> ID | NUM | LP expr RP

File 2:

4+2 { this is
a comment }
+ 6

File 3:

1 +

2

Explanation / Answer

tokenizer in ruby.....

require_relative 'spec_helper'


describe Tokenizer do
let(:tokenizer) { described_class.new }
let(:offset) { 1 } # default token offset


describe '#get' do
    it 'does tokenization' do
      expect(tokenizer.get('foo bar')).to eq ([offset, offset + 1])
    end


    it 'ignores too short tokens' do
      t = described_class.new(min_length: 2)
      expect(t.get('x')).to eq []
    end


    it 'ignores stop words' do
      t = described_class.new(stop_words: ['xyz'])
      expect(t.get('xyz foo')).to eq [offset]
    end


    it 'does not return nil tokens' do
      tokenizer.tokens.get('foo')
      tokenizer.tokens.freeze!
      expect(tokenizer.get('foo bar')).to eq [offset]
    end
end


describe '#tokens' do
    it 'returns a tokens object by default' do
      expect(tokenizer.tokens).to be_a Tokens
    end


    it 'can be overridden' do
      tokens = Tokens.new
      t = described_class.new(tokens)
      expect(t.tokens).to be tokens
    end
end
end

..

tokens_spec.rb

require_relative 'spec_helper'
require 'tempfile'


describe Tokens do
let(:tokens) { described_class.new }
let(:offset) { 1 } # default offset


describe '#get' do
    it 'can new tokens' do
      expect(tokens.get('bar')).to eq offset
      expect(tokens.get('foo')).to eq (offset + 1)
    end


    it 'can get an existing token' do
      tokens.get('bar')
      expect(tokens.get('bar')).to eq offset
    end


    it 'can include a prefix' do
      tokens.get('bar', prefix: 'XyZ$')
      expect(tokens.get('XyZ$bar')).to eq offset
    end


    it 'can get an existing token when frozen' do
      tokens.get('blup')
      tokens.freeze!
      expect(tokens.get('blup')).to eq offset
    end


    it 'cannot get a new token when frozen' do
      tokens.get('blup')
      tokens.freeze!
      expect(tokens.get('blabla')).to be_nil
    end
end


describe '#find' do
    it 'can find an existing token' do
      tokens.get('blup')
      i = tokens.get('blah')
      expect(tokens.find(i)).to eq 'blah'
    end


    it 'returns nil for a non-existing token' do
      tokens.get('blup')
      expect(tokens.find(offset + 1)).to eq nil
    end


    it 'removes the prefix' do
      i = tokens.get('blup', prefix: 'FOO$')
      expect(tokens.find(i, prefix: 'FOO$')).to eq 'blup'
    end
end


describe '#indexes' do
    it 'is empty without tokens' do
      expect(tokens.indexes).to eq []
    end


    it 'returns the expected indexes' do
      tokens.get('foo')
      tokens.get('blup')
      expect(tokens.indexes).to eq [offset, offset + 1]
    end
end


describe '#offset' do
    it 'has a default' do
      expect(described_class.new.offset).to eq offset
    end


    it 'can override the default' do
      expect(described_class.new(offset: 5).offset).to eq 5
    end


    it 'affects the first number' do
      tokens = described_class.new(offset: 12)
      expect(tokens.get('hi')).to eq 12
    end
end


describe '#frozen?' do
    it 'is not frozen by default' do
      expect(tokens.frozen?).to be false
    end


    it 'can be frozen' do
      tokens.freeze!
      expect(tokens.frozen?).to be true
    end


    it 'can be thawed' do
      tokens.freeze!
      tokens.thaw!
      expect(tokens.frozen?).to be false
    end
end


describe '#limit!' do
    it 'limits to most frequent tokens by max_size' do
      tokens.get('foo')
      tokens.get('blup')
      tokens.get('blup')
      tokens.limit!(max_size: 1)
      expect(tokens.indexes).to eq [offset + 1]
    end


    it 'limits by min_occurence' do
      tokens.get('foo')
      tokens.get('blup')
      tokens.get('foo')
      tokens.limit!(min_occurence: 2)
      expect(tokens.indexes).to eq [offset]
    end
end


describe '#load' do
    let(:file) { Tempfile.new('tokens') }
    after { file.unlink }


    it 'saves and loads tokens' do
      tokens.get('foo')
      tokens.get('bar')
      tokens.save(file.path)
      expect(File.exists?(file.path)).to be true
      expect(File.zero?(file.path)).to be false


      ntokens = described_class.new
      ntokens.load(file.path)
      expect(tokens.get('bar')).to eq (offset + 1)
    end
end
end

.

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote