#include <LDA.hpp>

Public Member Functions
	LDA (std::shared_ptr< parameters::Parameters > model_parameters, std::shared_ptr< em::EStepInterface< Scalar > > e_step, std::shared_ptr< em::MStepInterface< Scalar > > m_step, size_t iterations=20, size_t workers=1)

	LDA (LDA &&lda)

void	fit (const Eigen::MatrixXi &X, const Eigen::VectorXi &y)

void	fit (const Eigen::MatrixXi &X)

void	partial_fit (const Eigen::MatrixXi &X, const Eigen::VectorXi &y)

void	partial_fit (std::shared_ptr< corpus::Corpus > corpus)

MatrixX	transform (const Eigen::MatrixXi &X)

MatrixX	decision_function (const Eigen::MatrixXi &X)

Eigen::VectorXi	predict (const Eigen::MatrixXi &X)

std::tuple< MatrixX, Eigen::VectorXi >	transform_predict (const Eigen::MatrixXi &X)

std::shared_ptr< events::EventDispatcherInterface >	get_event_dispatcher ()

const std::shared_ptr< parameters::Parameters >	model_parameters ()

template<typename P >
const std::shared_ptr< P >	model_parameters ()

Protected Member Functions
std::shared_ptr< corpus::Corpus >	get_corpus (const Eigen::MatrixXi &X, const Eigen::VectorXi &y)

std::shared_ptr< corpus::Corpus >	get_corpus (const Eigen::MatrixXi &X)

void	create_worker_pool ()

void	destroy_worker_pool ()

void	process_worker_events ()

std::tuple< std::shared_ptr< parameters::Parameters >, size_t >	extract_vp_from_queue ()

void	doc_e_step_worker ()

MatrixX	decision_function (const MatrixX &X)

Eigen::VectorXi	predict (const MatrixX &scores)

Detailed Description

template<typename Scalar = double>
class ldaplusplus::LDA< Scalar >

LDA contains the logic of using an expectation step, a maximization step and some model parameters to train and make use of an LDA model.

It is agnostic of the underlying implementations it uses and thus allows for experimentation through a common facade.
It uses multiple threads to compute the time consuming expectation step.
It aggregates all the events and redispatches them on the same thread through a single event dispatcher.
It provides a very simple interface (borrowed from scikit-learn)

Constructor & Destructor Documentation

template<typename Scalar>

ldaplusplus::LDA< Scalar >::LDA	(	std::shared_ptr< parameters::Parameters >	model_parameters,
		std::shared_ptr< em::EStepInterface< Scalar > >	e_step,
		std::shared_ptr< em::MStepInterface< Scalar > >	m_step,
		size_t	iterations = `20`,
		size_t	workers = `1`
	)

Create an LDA with the given model parameters, expectation and maximization steps default iterations and worker threads.

Parameters

model_parameters	A pointer to a struct containing the model parameters (for instance ModelParameters and SupervisedModelParameters)
e_step	A pointer to an expectation step implementation
m_step	A pointer to a maximization step implementation
iterations	The number of epochs to run when using LDA::fit
workers	The number of worker threads to create for computing the expectation step

template<typename Scalar>

ldaplusplus::LDA< Scalar >::LDA ( LDA< Scalar > && lda )

Create a move constructor that doesn't try to copy or move mutexes.

Member Function Documentation

template<typename Scalar >

void ldaplusplus::LDA< Scalar >::create_worker_pool ( )

protected

Create a worker thread pool.

template<typename Scalar >

LDA< Scalar >::MatrixX ldaplusplus::LDA< Scalar >::decision_function ( const Eigen::MatrixXi & X )

Treat the SupervisedModelParameters::eta as a linear model and compute the distances from the planes of the documents in the topic space.

Use LDA::transform to obtain the \(\gamma\) for every document and then assume that the \(\eta\) parameters of the SupervisedModelParameters are a linear model. Compute the dot product between the normal vectors and the normalized topic mixtures for each document. The more positive the value for a given class the more confident is the model that a document belongs in this class.

Parameters

X	The word counts in column-major order

Returns: A matrix of class scores (positive => confident) for each document

template<typename Scalar >

LDA< Scalar >::MatrixX ldaplusplus::LDA< Scalar >::decision_function ( const MatrixX & X )

protected

Implement the decision function using already transformed data. Topic representations instead of BOW.

Parameters

X	The \(\gamma\) variational parameter for each document in a column-major ordered matrix.

Returns: A matrix of class scores (positive => confident) for each document

template<typename Scalar >

void ldaplusplus::LDA< Scalar >::destroy_worker_pool ( )

protected

Destroy the worker thread pool

template<typename Scalar >

void ldaplusplus::LDA< Scalar >::doc_e_step_worker ( )

protected

A doc_e_step worker thread.

template<typename Scalar >

std::tuple< std::shared_ptr< parameters::Parameters >, size_t > ldaplusplus::LDA< Scalar >::extract_vp_from_queue ( )

protected

Extract the variational parameters and the document index from the worker queue.

template<typename Scalar >

void ldaplusplus::LDA< Scalar >::fit	(	const Eigen::MatrixXi &	X,
		const Eigen::VectorXi &	y
	)

Compute a supervised topic model for word counts X and classes y.

Perform as many EM iterations as configured and stop when reaching max_iter_ or any other stopping criterion.

An EigenClassificationCorpus will be created from the passed parameters.

Parameters

X	The word counts in column-major order
y	The classes as integers

template<typename Scalar >

void ldaplusplus::LDA< Scalar >::fit ( const Eigen::MatrixXi & X )

Compute an unsupervised topic model for word counts X.

Perform as many EM iterations as configured and stop when reaching max_iter_ or any other stopping criterion.

An EigenCorpus will be created from the passed parameters.

Parameters

X	The word counts in column-major order

template<typename Scalar >

std::shared_ptr< corpus::Corpus > ldaplusplus::LDA< Scalar >::get_corpus	(	const Eigen::MatrixXi &	X,
		const Eigen::VectorXi &	y
	)

protected

Generate a Corpus from a pair of X, y matrices

template<typename Scalar >

std::shared_ptr< corpus::Corpus > ldaplusplus::LDA< Scalar >::get_corpus ( const Eigen::MatrixXi & X )

protected

Generate a Corpus from just the word count matrix.

template<typename Scalar = double>

std::shared_ptr<events::EventDispatcherInterface> ldaplusplus::LDA< Scalar >::get_event_dispatcher ( )

inline

Get the event dispatcher for this LDA instance.

template<typename Scalar = double>

const std::shared_ptr<parameters::Parameters> ldaplusplus::LDA< Scalar >::model_parameters ( )

inline

Get a constant reference to the model's parameters.

template<typename Scalar >

void ldaplusplus::LDA< Scalar >::partial_fit	(	const Eigen::MatrixXi &	X,
		const Eigen::VectorXi &	y
	)

Perform a single EM iteration.

An EigenClassificationCorpus will be created from the passed parameters.

Parameters

X	The word counts in column-major order
y	The classes as integers

template<typename Scalar >

void ldaplusplus::LDA< Scalar >::partial_fit ( std::shared_ptr< corpus::Corpus > corpus )

Perform a single EM iteration.

Parameters

corpus The implementation of Corpus that contains the observed variables.

template<typename Scalar >

Eigen::VectorXi ldaplusplus::LDA< Scalar >::predict ( const Eigen::MatrixXi & X )

Use the model to predict the class indexes for the word counts X.

Use LDA::decision_function to get class scores and then compute the argmax for every document.

Parameters

X	The word counts in column-major order

Returns: A matrix of class indexes (the predicted class for each document)

template<typename Scalar >

Eigen::VectorXi ldaplusplus::LDA< Scalar >::predict ( const MatrixX & scores )

protected

Transform the decision function to class predictions.

template<typename Scalar = double>

void ldaplusplus::LDA< Scalar >::process_worker_events ( )

inlineprotected

Forward the events generated in the worker threads to this event dispatcher in this thread.

template<typename Scalar >

LDA< Scalar >::MatrixX ldaplusplus::LDA< Scalar >::transform ( const Eigen::MatrixXi & X )

Run the expectation step and return the topic mixtures for the documents defined by the word counts X.

Parameters

X	The word counts in column-major order

Returns: The variational parameter \(\gamma\) for every document that approximates the count of words generated by each topic

template<typename Scalar >

std::tuple< typename LDA< Scalar >::MatrixX, Eigen::VectorXi > ldaplusplus::LDA< Scalar >::transform_predict ( const Eigen::MatrixXi & X )

Return both the class predictions and the transformed data using a single LDA expectation step.

Parameters

X	The word counts in column-major order

Returns: A tuple containing a matrix with the \(\gamma\) variational parameters for every document and a vector containing the class index predictions for every document.

The documentation for this class was generated from the following files:

include/ldaplusplus/events/ProgressEvents.hpp
include/ldaplusplus/LDA.hpp
src/ldaplusplus/LDA.cpp

Public Member Functions

Protected Member Functions

Detailed Description

template<typename Scalar = double> class ldaplusplus::LDA< Scalar >

Constructor & Destructor Documentation

Member Function Documentation

template<typename Scalar = double>
class ldaplusplus::LDA< Scalar >