Core text modules

Open In Colab

Contain the modules common between different architectures and the generic functions to get models

  1. /usr/local/lib/python3.8/dist-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at /pytorch/c10/cuda/CUDAFunctions.cpp:100.)
  2. return torch._C._cuda_getDeviceCount() > 0

Language models

class LinearDecoder[source]

LinearDecoder(n_out, n_hid, output_p=0.1, tie_encoder=None, bias=True) :: Module

To go on top of a RNNCore module and create a Language Model.

  1. from fastai.text.models.awdlstm import *
  1. enc = AWD_LSTM(100, 20, 10, 2)
  2. x = torch.randint(0, 100, (10,5))
  3. r = enc(x)
  4. tst = LinearDecoder(100, 20, 0.1)
  5. y = tst(r)
  6. test_eq(y[1], r)
  7. test_eq(y[2].shape, r.shape)
  8. test_eq(y[0].shape, [10, 5, 100])
  9. tst = LinearDecoder(100, 20, 0.1, tie_encoder=enc.encoder)
  10. test_eq(tst.decoder.weight, enc.encoder.weight)

class SequentialRNN[source]

SequentialRNN(*args:Any) :: Sequential

A sequential module that passes the reset call to its children.

  1. class _TstMod(Module):
  2. def reset(self): print('reset')
  3. tst = SequentialRNN(_TstMod(), _TstMod())
  4. test_stdout(tst.reset, 'resetnreset')

get_language_model[source]

get_language_model(arch, vocab_sz, config=None, drop_mult=1.0)

Create a language model from arch and its config.

The default config used can be found in _model_meta[arch]['config_lm']. drop_mult is applied to all the probabilities of dropout in that config.

  1. config = awd_lstm_lm_config.copy()
  2. config.update({'n_hid':10, 'emb_sz':20})
  3. tst = get_language_model(AWD_LSTM, 100, config=config)
  4. x = torch.randint(0, 100, (10,5))
  5. y = tst(x)
  6. test_eq(y[0].shape, [10, 5, 100])
  7. test_eq(y[1].shape, [10, 5, 20])
  8. test_eq(y[2].shape, [10, 5, 20])
  9. test_eq(tst[1].decoder.weight, tst[0].encoder.weight)
  1. tst = get_language_model(AWD_LSTM, 100, config=config, drop_mult=0.5)
  2. test_eq(tst[1].output_dp.p, config['output_p']*0.5)
  3. for rnn in tst[0].rnns: test_eq(rnn.weight_p, config['weight_p']*0.5)
  4. for dp in tst[0].hidden_dps: test_eq(dp.p, config['hidden_p']*0.5)
  5. test_eq(tst[0].encoder_dp.embed_p, config['embed_p']*0.5)
  6. test_eq(tst[0].input_dp.p, config['input_p']*0.5)

Classification models

class SentenceEncoder[source]

SentenceEncoder(bptt, module, pad_idx=1, max_len=None) :: Module

Create an encoder over module that can process a full sentence.

Warning: This module expects the inputs padded with most of the padding first, with the sequence beginning at a round multiple of bptt (and the rest of the padding at the end). Use pad_input_chunk to get your data in a suitable format.

  1. mod = nn.Embedding(5, 10)
  2. tst = SentenceEncoder(5, mod, pad_idx=0)
  3. x = torch.randint(1, 5, (3, 15))
  4. x[2,:5]=0
  5. out,mask = tst(x)
  6. test_eq(out[:1], mod(x)[:1])
  7. test_eq(out[2,5:], mod(x)[2,5:])
  8. test_eq(mask, x==0)

masked_concat_pool[source]

masked_concat_pool(output, mask, bptt)

Pool MultiBatchEncoder outputs into one vector [last_hidden, max_pool, avg_pool]

  1. out = torch.randn(2,4,5)
  2. mask = tensor([[True,True,False,False], [False,False,False,True]])
  3. x = masked_concat_pool(out, mask, 2)
  4. test_close(x[0,:5], out[0,-1])
  5. test_close(x[1,:5], out[1,-2])
  6. test_close(x[0,5:10], out[0,2:].max(dim=0)[0])
  7. test_close(x[1,5:10], out[1,:3].max(dim=0)[0])
  8. test_close(x[0,10:], out[0,2:].mean(dim=0))
  9. test_close(x[1,10:], out[1,:3].mean(dim=0))
  1. out1 = torch.randn(2,4,5)
  2. out1[0,2:] = out[0,2:].clone()
  3. out1[1,:3] = out[1,:3].clone()
  4. x1 = masked_concat_pool(out1, mask, 2)
  5. test_eq(x, x1)

class PoolingLinearClassifier[source]

PoolingLinearClassifier(dims, ps, bptt, y_range=None) :: Module

Create a linear classifier with pooling

  1. mod = nn.Embedding(5, 10)
  2. tst = SentenceEncoder(5, mod, pad_idx=0)
  3. x = torch.randint(1, 5, (3, 15))
  4. x[2,:5]=0
  5. out,mask = tst(x)
  6. test_eq(out[:1], mod(x)[:1])
  7. test_eq(out[2,5:], mod(x)[2,5:])
  8. test_eq(mask, x==0)

get_text_classifier[source]

get_text_classifier(arch, vocab_sz, n_class, seq_len=72, config=None, drop_mult=1.0, lin_ftrs=None, ps=None, pad_idx=1, max_len=1440, y_range=None)

Create a text classifier from arch and its config, maybe pretrained

  1. config = awd_lstm_clas_config.copy()
  2. config.update({'n_hid':10, 'emb_sz':20})
  3. tst = get_text_classifier(AWD_LSTM, 100, 3, config=config)
  4. x = torch.randint(2, 100, (10,5))
  5. y = tst(x)
  6. test_eq(y[0].shape, [10, 3])
  7. test_eq(y[1].shape, [10, 5, 20])
  8. test_eq(y[2].shape, [10, 5, 20])
  1. tst.eval()
  2. y = tst(x)
  3. x1 = torch.cat([x, tensor([2,1,1,1,1,1,1,1,1,1])[:,None]], dim=1)
  4. y1 = tst(x1)
  5. test_close(y[0][1:],y1[0][1:])
  1. tst = get_text_classifier(AWD_LSTM, 100, 3, config=config, drop_mult=0.5)
  2. test_eq(tst[1].layers[1][1].p, 0.1)
  3. test_eq(tst[1].layers[0][1].p, config['output_p']*0.5)
  4. for rnn in tst[0].module.rnns: test_eq(rnn.weight_p, config['weight_p']*0.5)
  5. for dp in tst[0].module.hidden_dps: test_eq(dp.p, config['hidden_p']*0.5)
  6. test_eq(tst[0].module.encoder_dp.embed_p, config['embed_p']*0.5)
  7. test_eq(tst[0].module.input_dp.p, config['input_p']*0.5)

Company logo

©2021 fast.ai. All rights reserved.
Site last generated: Mar 31, 2021