LDA++
|
#include <LDABuilder.hpp>
Public Member Functions | |
LDABuilder () | |
LDABuilder & | set_iterations (size_t iterations) |
LDABuilder & | set_workers (size_t workers) |
std::shared_ptr< em::EStepInterface< Scalar > > | get_classic_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar compute_likelihood=1.0, int random_state=0) |
LDABuilder & | set_classic_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar compute_likelihood=1.0, int random_state=0) |
std::shared_ptr< em::EStepInterface< Scalar > > | get_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, size_t fixed_point_iterations=10, Scalar compute_likelihood=1.0, int random_state=0) |
LDABuilder & | set_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, size_t fixed_point_iterations=10, Scalar compute_likelihood=1.0, int random_state=0) |
std::shared_ptr< em::EStepInterface< Scalar > > | get_fast_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar C=1, Scalar compute_likelihood=1.0, int random_state=0) |
LDABuilder & | set_fast_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar C=1, Scalar compute_likelihood=1.0, int random_state=0) |
std::shared_ptr< em::EStepInterface< Scalar > > | get_semi_supervised_e_step (std::shared_ptr< em::EStepInterface< Scalar > > supervised_step=nullptr, std::shared_ptr< em::EStepInterface< Scalar > > unsupervised_step=nullptr) |
LDABuilder & | set_semi_supervised_e_step (std::shared_ptr< em::EStepInterface< Scalar > > supervised_step=nullptr, std::shared_ptr< em::EStepInterface< Scalar > > unsupervised_step=nullptr) |
std::shared_ptr< em::EStepInterface< Scalar > > | get_multinomial_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar mu=2, Scalar eta_weight=1, Scalar compute_likelihood=1.0, int random_state=0) |
LDABuilder & | set_multinomial_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar mu=2, Scalar eta_weight=1, Scalar compute_likelihood=1.0, int random_state=0) |
std::shared_ptr< em::EStepInterface< Scalar > > | get_correspondence_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar mu=2, Scalar compute_likelihood=1.0, int random_state=0) |
LDABuilder & | set_correspondence_supervised_e_step (size_t e_step_iterations=10, Scalar e_step_tolerance=1e-2, Scalar mu=2, Scalar compute_likelihood=1.0, int random_state=0) |
LDABuilder & | set_e (std::shared_ptr< em::EStepInterface< Scalar > > e_step) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_classic_m_step () |
LDABuilder & | set_classic_m_step () |
std::shared_ptr< em::MStepInterface< Scalar > > | get_fast_supervised_m_step (size_t m_step_iterations=10, Scalar m_step_tolerance=1e-2, Scalar regularization_penalty=1e-2) |
LDABuilder & | set_fast_supervised_m_step (size_t m_step_iterations=10, Scalar m_step_tolerance=1e-2, Scalar regularization_penalty=1e-2) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_supervised_m_step (size_t m_step_iterations=10, Scalar m_step_tolerance=1e-2, Scalar regularization_penalty=1e-2) |
LDABuilder & | set_supervised_m_step (size_t m_step_iterations=10, Scalar m_step_tolerance=1e-2, Scalar regularization_penalty=1e-2) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_fast_supervised_online_m_step (size_t num_classes, Scalar regularization_penalty=1e-2, size_t minibatch_size=128, Scalar eta_momentum=0.9, Scalar eta_learning_rate=0.01, Scalar beta_weight=0.9) |
LDABuilder & | set_fast_supervised_online_m_step (size_t num_classes, Scalar regularization_penalty=1e-2, size_t minibatch_size=128, Scalar eta_momentum=0.9, Scalar eta_learning_rate=0.01, Scalar beta_weight=0.9) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_fast_supervised_online_m_step (std::vector< Scalar > class_weights, Scalar regularization_penalty=1e-2, size_t minibatch_size=128, Scalar eta_momentum=0.9, Scalar eta_learning_rate=0.01, Scalar beta_weight=0.9) |
LDABuilder & | set_fast_supervised_online_m_step (std::vector< Scalar > class_weights, Scalar regularization_penalty=1e-2, size_t minibatch_size=128, Scalar eta_momentum=0.9, Scalar eta_learning_rate=0.01, Scalar beta_weight=0.9) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_fast_supervised_online_m_step (Eigen::Matrix< Scalar, Eigen::Dynamic, 1 > class_weights, Scalar regularization_penalty=1e-2, size_t minibatch_size=128, Scalar eta_momentum=0.9, Scalar eta_learning_rate=0.01, Scalar beta_weight=0.9) |
LDABuilder & | set_fast_supervised_online_m_step (Eigen::Matrix< Scalar, Eigen::Dynamic, 1 > class_weights, Scalar regularization_penalty=1e-2, size_t minibatch_size=128, Scalar eta_momentum=0.9, Scalar eta_learning_rate=0.01, Scalar beta_weight=0.9) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_semi_supervised_m_step (size_t m_step_iterations=10, Scalar m_step_tolerance=1e-2, Scalar regularization_penalty=1e-2) |
LDABuilder & | set_semi_supervised_m_step (size_t m_step_iterations=10, Scalar m_step_tolerance=1e-2, Scalar regularization_penalty=1e-2) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_multinomial_supervised_m_step (Scalar mu=2.) |
LDABuilder & | set_multinomial_supervised_m_step (Scalar mu=2.) |
std::shared_ptr< em::MStepInterface< Scalar > > | get_correspondence_supervised_m_step (Scalar mu=2.) |
LDABuilder & | set_correspondence_supervised_m_step (Scalar mu=2.) |
LDABuilder & | set_m (std::shared_ptr< em::MStepInterface< Scalar > > m_step) |
LDABuilder & | initialize_topics_seeded (const Eigen::MatrixXi &X, size_t topics, size_t N=30, int random_state=0) |
LDABuilder & | initialize_topics_seeded (std::shared_ptr< corpus::Corpus > corpus, size_t topics, size_t N=30, int random_state=0) |
LDABuilder & | initialize_topics_random (size_t words, size_t topics, int random_state=0) |
LDABuilder & | initialize_topics_from_model (std::shared_ptr< parameters::ModelParameters< Scalar > > model) |
LDABuilder & | initialize_eta_zeros (size_t num_classes) |
LDABuilder & | initialize_eta_uniform (size_t num_classes) |
LDABuilder & | initialize_eta_from_model (std::shared_ptr< parameters::SupervisedModelParameters< Scalar > > model) |
virtual | operator LDA< Scalar > () const override |
The LDABuilder provides a simpler interface to build an LDA.
The builder has the following three main responsibilities:
Examples:
LDA<double> lda = LDABuilder<double>(). initialize_topics_seeded(X, 100);
LDA<double> lda = LDABuilder<double>(). set_iterations(20). set_classic_e_step(). set_supervised_m_step(). initialize_topics_seeded(X, 100). initialize_eta_zeros(y.maxCoeff() + 1);
LDA<double> lda = LDABuilder<double>(). set_classic_e_step(50, 1e-2). set_supervised_m_step(). initialize_topics_from_model(model). initialize_eta_from_model(model);
ldaplusplus::LDABuilder< Scalar >::LDABuilder | ( | ) |
Create a default builder that will create a simple unsupervised LDA.
The default builder uses unsupervised expectation and maximization steps with 20 iterations and as many workers as there are cpus available.
Before being usable the model parameters must be initialized to set the number of topics etc.
std::shared_ptr< em::EStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_classic_e_step | ( | size_t | e_step_iterations = 10 , |
Scalar | e_step_tolerance = 1e-2 , |
||
Scalar | compute_likelihood = 1.0 , |
||
int | random_state = 0 |
||
) |
Create an UnsupervisedEStep.
You can also see a description of the parameters at UnsupervisedEStep::UnsupervisedEStep
e_step_iterations | The max number of times to alternate between maximizing for \(\gamma\) and for \(\phi\). |
e_step_tolerance | The minimum relative change in the variational parameter \(\gamma\). |
compute_likelihood | The percentage of documents to compute likelihood for (1.0 means compute for every document) |
random_state | An initial seed value for any random numbers needed |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_classic_m_step | ( | ) |
Create an UnsupervisedMStep.
std::shared_ptr< em::EStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_correspondence_supervised_e_step | ( | size_t | e_step_iterations = 10 , |
Scalar | e_step_tolerance = 1e-2 , |
||
Scalar | mu = 2 , |
||
Scalar | compute_likelihood = 1.0 , |
||
int | random_state = 0 |
||
) |
Create an CorrespondenceSupervisedEStep.
You can also see a description of the parameters at CorrespondenceSupervisedEStep::CorrespondenceSupervisedEStep.
e_step_iterations | The maximum iterations for each document's expectation step |
e_step_tolerance | The minimum relative change in the ELBO (less than that and we stop iterating) |
mu | A uniform Dirichlet prior for the supervised parameters (default: 2) |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_correspondence_supervised_m_step | ( | Scalar | mu = 2. | ) |
Create a CorrespondenceSupervisedMStep.
You can also see a description of the parameters at CorrespondenceSupervisedMStep::CorrespondenceSupervisedMStep.
mu | A uniform Dirichlet prior for the supervised parameters |
std::shared_ptr< em::EStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_fast_supervised_e_step | ( | size_t | e_step_iterations = 10 , |
Scalar | e_step_tolerance = 1e-2 , |
||
Scalar | C = 1 , |
||
Scalar | compute_likelihood = 1.0 , |
||
int | random_state = 0 |
||
) |
Create a FastSupervisedEStep.
You can also see a description of the parameters at FastSupervisedEStep::FastSupervisedEStep.
e_step_iterations | The maximum iterations for each document's expectation step |
e_step_tolerance | The minimum relative change in the ELBO (less than that and we stop iterating) |
C | Weight of the supervised part in the inference (default: 1) |
compute_likelihood | The percentage of documents to compute likelihood for (1.0 means compute for every document) |
random_state | An initial seed value for any random numbers needed |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_fast_supervised_m_step | ( | size_t | m_step_iterations = 10 , |
Scalar | m_step_tolerance = 1e-2 , |
||
Scalar | regularization_penalty = 1e-2 |
||
) |
Create a SupervisedMStep.
You can also see a description of the parameters at SupervisedMStep::SupervisedMStep.
m_step_iterations | The maximum number of gradient descent iterations |
m_step_tolerance | The minimum relative improvement in the log likelihood between consecutive gradient descent iterations |
regularization_penalty | The L2 penalty for logistic regression |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_fast_supervised_online_m_step | ( | size_t | num_classes, |
Scalar | regularization_penalty = 1e-2 , |
||
size_t | minibatch_size = 128 , |
||
Scalar | eta_momentum = 0.9 , |
||
Scalar | eta_learning_rate = 0.01 , |
||
Scalar | beta_weight = 0.9 |
||
) |
Create an FastOnlineSupervisedMStep without specifying class weights.
You can also see a description of the parameters at FastOnlineSupervisedMStep::FastOnlineSupervisedMStep.
num_classes | The number of classes |
regularization_penalty | The L2 penalty for the logistic regression |
minibatch_size | After that many documents call m_step() |
eta_momentum | The momentum for the SGD update of \(\eta\) |
eta_learning_rate | The learning rate for the SGD update of \(\eta\) |
beta_weight | The weight for the online update of \(\beta\) |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_fast_supervised_online_m_step | ( | std::vector< Scalar > | class_weights, |
Scalar | regularization_penalty = 1e-2 , |
||
size_t | minibatch_size = 128 , |
||
Scalar | eta_momentum = 0.9 , |
||
Scalar | eta_learning_rate = 0.01 , |
||
Scalar | beta_weight = 0.9 |
||
) |
Create an FastOnlineSupervisedMStep.
You can also see a description of the parameters at FastOnlineSupervisedMStep::FastOnlineSupervisedMStep.
class_weights | Weights to account for class imbalance |
regularization_penalty | The L2 penalty for the logistic regression |
minibatch_size | After that many documents call m_step() |
eta_momentum | The momentum for the SGD update of \(\eta\) |
eta_learning_rate | The learning rate for the SGD update of \(\eta\) |
beta_weight | The weight for the online update of \(\beta\) |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_fast_supervised_online_m_step | ( | Eigen::Matrix< Scalar, Eigen::Dynamic, 1 > | class_weights, |
Scalar | regularization_penalty = 1e-2 , |
||
size_t | minibatch_size = 128 , |
||
Scalar | eta_momentum = 0.9 , |
||
Scalar | eta_learning_rate = 0.01 , |
||
Scalar | beta_weight = 0.9 |
||
) |
Create an FastOnlineSupervisedMStep.
You can also see a description of the parameters at FastOnlineSupervisedMStep::FastOnlineSupervisedMStep.
class_weights | Weights to account for class imbalance |
regularization_penalty | The L2 penalty for the logistic regression |
minibatch_size | After that many documents call m_step() |
eta_momentum | The momentum for the SGD update of \(\eta\) |
eta_learning_rate | The learning rate for the SGD update of \(\eta\) |
beta_weight | The weight for the online update of \(\beta\) |
std::shared_ptr< em::EStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_multinomial_supervised_e_step | ( | size_t | e_step_iterations = 10 , |
Scalar | e_step_tolerance = 1e-2 , |
||
Scalar | mu = 2 , |
||
Scalar | eta_weight = 1 , |
||
Scalar | compute_likelihood = 1.0 , |
||
int | random_state = 0 |
||
) |
Create an MultinomialSupervisedEStep.
You can also see a description of the parameters at MultinomialSupervisedEStep::MultinomialSupervisedEStep.
e_step_iterations | The max number of times to alternate between maximizing for \(\gamma\) and for \(\phi\). |
e_step_tolerance | The minimum relative change in the variational parameter \(\gamma\). |
mu | The uniform Dirichlet prior of \(\eta\), practically is a smoothing parameter during the maximization of \(\eta\). |
eta_weight | A weighting parameter that either increases or decreases the influence of the supervised part. |
compute_likelihood | The percentage of documents to compute likelihood for (1.0 means compute for every document) |
random_state | An initial seed value for any random numbers needed |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_multinomial_supervised_m_step | ( | Scalar | mu = 2. | ) |
Create a MultinomialSupervisedMStep.
You can also see a description of the parameters at MultinomialSupervisedMStep::MultinomialSupervisedMStep.
mu | A uniform Dirichlet prior for the supervised parameters |
std::shared_ptr< em::EStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_semi_supervised_e_step | ( | std::shared_ptr< em::EStepInterface< Scalar > > | supervised_step = nullptr , |
std::shared_ptr< em::EStepInterface< Scalar > > | unsupervised_step = nullptr |
||
) |
Create a SemiSupervisedEStep.
You can also see a description of the parameters at SemiSupervisedEStep::SemiSupervisedEStep.
supervised_step | The supervised step to use (when nullptr is provided it defaults to FastSupervisedEStep with default parameters) |
unsupervised_step | The unsupervised step to use (when nullptr is provided it defaults to FastUnsupervisedEStep with default parameters) |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_semi_supervised_m_step | ( | size_t | m_step_iterations = 10 , |
Scalar | m_step_tolerance = 1e-2 , |
||
Scalar | regularization_penalty = 1e-2 |
||
) |
Create a SemiSupervisedMStep.
You can also see a description of the parameters at SemiSupervisedMStep::SemiSupervisedMStep.
m_step_iterations | The maximum number of gradient descent iterations |
m_step_tolerance | The minimum relative improvement in the log likelihood between consecutive gradient descent iterations |
regularization_penalty | The L2 penalty for logistic regression |
std::shared_ptr< em::EStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_supervised_e_step | ( | size_t | e_step_iterations = 10 , |
Scalar | e_step_tolerance = 1e-2 , |
||
size_t | fixed_point_iterations = 10 , |
||
Scalar | compute_likelihood = 1.0 , |
||
int | random_state = 0 |
||
) |
Create a SupervisedEStep.
You can also see a description of the parameters at SupervisedEStep::SupervisedEStep.
e_step_iterations | The maximum iterations for each document's expectation step |
e_step_tolerance | The minimum relative change in the ELBO (less than that and we stop iterating) |
fixed_point_iterations | The number of fixed point iterations see SupervisedEStep |
std::shared_ptr< em::MStepInterface< Scalar > > ldaplusplus::LDABuilder< Scalar >::get_supervised_m_step | ( | size_t | m_step_iterations = 10 , |
Scalar | m_step_tolerance = 1e-2 , |
||
Scalar | regularization_penalty = 1e-2 |
||
) |
Create a SupervisedMStep.
You can also see a description of the parameters at SupervisedMStep::SupervisedMStep.
m_step_iterations | The maximum number of gradient descent iterations |
m_step_tolerance | The minimum relative improvement in the log likelihood between consecutive gradient descent iterations |
regularization_penalty | The L2 penalty for logistic regression |
|
inline |
Initialize the supervised model parameters from another model.
In practice copy \(\eta\) from the passed in model to a local copy.
LDABuilder< Scalar > & ldaplusplus::LDABuilder< Scalar >::initialize_eta_uniform | ( | size_t | num_classes | ) |
Initialize the supervised model parameters which generate the class label with a uniform multinomial distribution.
num_classes | The number of classes |
LDABuilder< Scalar > & ldaplusplus::LDABuilder< Scalar >::initialize_eta_zeros | ( | size_t | num_classes | ) |
Initialize the supervised model parameters which generate the class label with zeros.
num_classes | The number of classes |
|
inline |
Initialize the topic distributions from another model.
In practice copy \(\beta\) and \(\alpha\) from the passed in model to a local copy.
LDABuilder< Scalar > & ldaplusplus::LDABuilder< Scalar >::initialize_topics_random | ( | size_t | words, |
size_t | topics, | ||
int | random_state = 0 |
||
) |
Initialize the topics over words distributions as random distributions.
This initialization also initializes alpha as 1.0 / topics
words | The number of distinct words in the vocabulary |
topics | The number of topics |
random_state | The initial state of the random number generator |
LDABuilder< Scalar > & ldaplusplus::LDABuilder< Scalar >::initialize_topics_seeded | ( | const Eigen::MatrixXi & | X, |
size_t | topics, | ||
size_t | N = 30 , |
||
int | random_state = 0 |
||
) |
Initialize the topic over words distributions by seeding them from the passed in documents.
This initialization also initializes alpha as 1.0 / topics
X | The word counts for each document |
topics | The number of topics |
N | The number of documents to use for seeding |
random_state | The initial state of the random number generator |
LDABuilder< Scalar > & ldaplusplus::LDABuilder< Scalar >::initialize_topics_seeded | ( | std::shared_ptr< corpus::Corpus > | corpus, |
size_t | topics, | ||
size_t | N = 30 , |
||
int | random_state = 0 |
||
) |
Initialize the topic over words distributions by seeding them from the passed in documents.
This initialization also initializes alpha as 1.0 / topics
corpus | The word counts for each document |
topics | The number of topics |
N | The number of documents to use for seeding |
random_state | The initial state of the random number generator |
|
inlineoverridevirtual |
Build a brand new LDA instance from the configuration of the builder.
Before returning it also checks a few things that would result in an unusable LDA instance and throws a runtime_error.
Implements ldaplusplus::LDABuilderInterface< Scalar >.
|
inline |
See the corresponding get_*_e_step() method.
|
inline |
See the corresponding get_*_m_step() method.
|
inline |
See the corresponding get_*_e_step() method.
|
inline |
See the corresponding get_*_m_step() method.
|
inline |
Set an expectation step.
This is meant to be used with all the methods get_*_e_step() as in the following example.
auto builder = LDABuilder<double>(); builder.set_e(builder.get_fast_classic_e_step());
|
inline |
See the corresponding get_*_e_step() method.
|
inline |
See the corresponding get_*_m_step() method.
LDABuilder< Scalar > & ldaplusplus::LDABuilder< Scalar >::set_iterations | ( | size_t | iterations | ) |
Choose a number of iterations see LDA::fit
|
inline |
Set a maximization step.
Can be used in conjuction with the get_*_m_step() methods.
|
inline |
See the corresponding get_*_e_step() method.
|
inline |
See the corresponding get_*_m_step() method.
|
inline |
See the corresponding get_*_e_step() method.
|
inline |
See the corresponding get_*_m_step() method.
|
inline |
See the corresponding get_*_e_step() method.
|
inline |
See the corresponding get_*_m_step() method.
LDABuilder< Scalar > & ldaplusplus::LDABuilder< Scalar >::set_workers | ( | size_t | workers | ) |
Choose a number of parallel workers for the expectation step