<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="FeedCreator 1.8" -->
<?xml-stylesheet href="https://jlab.soe.ucsc.edu/nlp-wiki/lib/exe/css.php?s=feed" type="text/css"?>
<rdf:RDF
    xmlns="http://purl.org/rss/1.0/"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
    xmlns:dc="http://purl.org/dc/elements/1.1/">
    <channel rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/feed.php">
        <title>NLP Wiki ml</title>
        <description></description>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/</link>
        <image rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/lib/tpl/dokuwiki/images/favicon.ico" />
       <dc:date>2026-05-13T20:47:04+00:00</dc:date>
        <items>
            <rdf:Seq>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:active_learning&amp;rev=1756357723&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:alternative_training_methods&amp;rev=1691784341&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:application_optimization&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:autoencoders&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:automatic_theorem_proving&amp;rev=1759847509&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:automl&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:bayesian_methods&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:biological_nns&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:catastrophic_forgetting&amp;rev=1749861121&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:classification&amp;rev=1749252335&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:cloud_computing_platforms&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:computer_use_agents&amp;rev=1765914122&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:conditional_computation&amp;rev=1742960607&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:conditional_random_field&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:confidence&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:continual_learning&amp;rev=1738370722&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:contrastive_learning&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:curriculum_learning&amp;rev=1719545274&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:data_augmentation&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:data_cleaning_and_validation&amp;rev=1748818819&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:deep_learning&amp;rev=1744090306&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:diffusion_models&amp;rev=1749684613&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:distributed_training&amp;rev=1748503132&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:distribution_shift&amp;rev=1690397979&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:edge_computing&amp;rev=1743019996&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:educational_framework_edf&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:efficient_nns&amp;rev=1746598622&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:em_algorithm&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ensembling&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:extreme_multi-label_classification&amp;rev=1749252300&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:fairness&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:few-shot_learning&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:fine-tuning&amp;rev=1752478659&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gans&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gpu_deep_learning&amp;rev=1752722752&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gradient_clipping&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:graphical_models&amp;rev=1746236620&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:graph_nn&amp;rev=1745022019&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:green_ai&amp;rev=1765361571&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:history_of_ml&amp;rev=1707901677&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:hugging_face&amp;rev=1700689695&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:hyperparameter_tuning&amp;rev=1741256430&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:image_generation&amp;rev=1749111051&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:infinite_neural_networks&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:information_theory&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:knowledge_distillation&amp;rev=1747037464&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:large-scale&amp;rev=1725669812&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_rate&amp;rev=1707179492&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_to_rank&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_with_noise&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:log_linear_models&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:loss_functions&amp;rev=1721694759&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:mechanistic_interpretability&amp;rev=1748863421&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:membership_inference&amp;rev=1772922257&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:meta-learning&amp;rev=1699559084&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:miscellaneous_neural_networks&amp;rev=1709864041&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:mixture_of_expert_models&amp;rev=1748677248&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_glossary&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_outline&amp;rev=1765914042&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_overview&amp;rev=1732744618&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_compression&amp;rev=1747040456&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_editing_and_unlearning&amp;rev=1751872429&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_selection&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:modularity&amp;rev=1709864194&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:multi-task_learning&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_architecture_search&amp;rev=1746434615&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_networks&amp;rev=1744090293&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_network_psychology&amp;rev=1743035611&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_architectures&amp;rev=1742888085&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_initialization&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_sparsity&amp;rev=1746640793&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_training&amp;rev=1720564168&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_tricks&amp;rev=1697062740&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:normalization&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimization&amp;rev=1709762240&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimization_in_deep_learning&amp;rev=1742863782&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimizers&amp;rev=1743019366&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:privacy&amp;rev=1737484903&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:probabilistic_logic&amp;rev=1688604445&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:program_induction&amp;rev=1702683190&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:pytorch&amp;rev=1699564213&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:quantum_machine_learning&amp;rev=1759817717&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:regularization&amp;rev=1710401906&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:reinforcement_learning&amp;rev=1752471605&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:sampling&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:scaling_laws&amp;rev=1748819368&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:self-play&amp;rev=1686941992&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:self-supervised_learning&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:semi-supervised_learning&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:software&amp;rev=1760031629&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:state-space_models&amp;rev=1755885492&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:support_vector_machines&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:systems_ml&amp;rev=1752819997&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:tensorflow&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:theory&amp;rev=1686814574&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:trustworthy_ai&amp;rev=1748625055&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:visualizing_neural_networks&amp;rev=1719547132&amp;do=diff"/>
                <rdf:li rdf:resource="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:zero-shot_learning&amp;rev=1686814574&amp;do=diff"/>
            </rdf:Seq>
        </items>
    </channel>
    <image rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/lib/tpl/dokuwiki/images/favicon.ico">
        <title>NLP Wiki</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/</link>
        <url>https://jlab.soe.ucsc.edu/nlp-wiki/lib/tpl/dokuwiki/images/favicon.ico</url>
    </image>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:active_learning&amp;rev=1756357723&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-08-28T05:08:43+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:active_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:active_learning&amp;rev=1756357723&amp;do=diff</link>
        <description>Active Learning

Overviews

	*  Settles 2009 - Active Learning Literature Survey
	*  Zhan et al 2022 - A Comparative Survey of Deep Active Learning
	*  Cacciarelli &amp; Kulahci 2023 - Active Learning for Data Streams: A Survey
	*  Wan et al 2023 - A Survey of Deep Active Learning for Foundation Models
	*  In NLP
		*  Zhang et al 2022 - A Survey of Active Learning for Natural Language Processing


Papers

	*  Core-Set: Sener &amp; Savarese 2017- Active Learning for Convolutional Neural Networks: A Core-…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:alternative_training_methods&amp;rev=1691784341&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-08-11T20:05:41+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:alternative_training_methods</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:alternative_training_methods&amp;rev=1691784341&amp;do=diff</link>
        <description>Neural Networks: Alternative Training Methods

Papers

	*  Unnikrishnan &amp; Venugopal 1994 - Alopex: A Correlation-Based Learning Algorithm for Feedforward and Recurrent Neural Networks pdf A stochastic training algorithm that does not use gradients, but instead looks at how stochastic changes in the weights change the loss function. Paper claims it can be used with discontinuous activation functions. This is a local search optimization method, similar to simulated annealing. Very simple to implem…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:application_optimization&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:application_optimization</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:application_optimization&amp;rev=1686814574&amp;do=diff</link>
        <description>Application: Optimization

Applications of machine learning in optimization.

Papers

	*  2018 - Machine Learning for Combinatorial Optimization: A Methodological Tour d’Horizon

Related Pages

	*  Meta-Learning
		*  For example Andrychowicz et al 2016 - Learning to Learn by Gradient Descent by Gradient Descent and Chen 2017 - Learning to Learn without Gradient Descent by Gradient Descent

	*  Optimization</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:autoencoders&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:autoencoders</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:autoencoders&amp;rev=1686814574&amp;do=diff</link>
        <description>Autoencoders

Variational Autoencoders

VAE are not really autoencoders, except that the training algorithm resembles that of an autoencoder. Perhaps this section should be moved somewhere else.

	*  Kingma &amp; Welling 2013 - Auto-Encoding Variational Bayes Introduced Variational Autoencoders
	*  Doersch 2016 - Tutorial on Variational Autoencoders</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:automatic_theorem_proving&amp;rev=1759847509&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-10-07T14:31:49+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:automatic_theorem_proving</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:automatic_theorem_proving&amp;rev=1759847509&amp;do=diff</link>
        <description>Automatic Theorem Proving

Overviews

	*  Jordan Meadows &amp; André Freitas 2022 - A Survey in Mathematical Language Processing
	*  Lu et al 2022 - A Survey of Deep Learning for Mathematical Reasoning
	*  Li et al 2024 - Survey on Deep Learning for Theorem Proving
	*  Formalizing mathematics
		*  Kaliszyk &amp; Rabe 2020 - A Survey of Languages for Formalizing Mathematics

	*  Autoformalization
		*  Weng et al 2025 - Autoformalization in the Era of Large Language Models: A Survey


Papers

	*  Cramer e…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:automl&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:automl</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:automl&amp;rev=1686814574&amp;do=diff</link>
        <description>See AutoML.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:bayesian_methods&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:bayesian_methods</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:bayesian_methods&amp;rev=1686814574&amp;do=diff</link>
        <description>Bayesian Methods

Bayesian methods are a sub-field of statistics (bayesian statistics), and are often used in machine learning.  Bayesian methods assume a prior over the space of models (the prior belief), and after observing the data, update the belief over the space of models (the posterior belief).  The posterior belief can be used to make predictions, etc.  (This is in contrast to frequentist methods that are</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:biological_nns&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:biological_nns</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:biological_nns&amp;rev=1686814574&amp;do=diff</link>
        <description>Biologically-Inspired Neural Networks

Papers

	*  Bartunov et al 2018 - Assessing the Scalability of Biologically-Motivated Deep Learning Algorithms and Architectures
	*  Lillicrap et al 2020 - Backpropagation and The Brain</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:catastrophic_forgetting&amp;rev=1749861121&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-14T00:32:01+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:catastrophic_forgetting</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:catastrophic_forgetting&amp;rev=1749861121&amp;do=diff</link>
        <description>Catastrophic Forgetting

Catastrophic forgetting is when a neural network is trained to do one task but loses that ability when trained to do a second task.

Overviews

	*  Kirkpatrick et al 2016 - Overcoming catastrophic forgetting in neural networks
	*  Wang et al 2023 - A Comprehensive Survey of Forgetting in Deep Learning Beyond Continual Learning
	*  2023 - Catastrophic Forgetting in Deep Learning: A Comprehensive Taxonomy

Papers

	*  EWC: Kirkpatrick et al 2016 - Overcoming catastrophic f…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:classification&amp;rev=1749252335&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-06T23:25:35+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:classification</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:classification&amp;rev=1749252335&amp;do=diff</link>
        <description>Classification

Classification NLP Tasks

	*  Binary classification
		*  Argumentative Text Understanding
		*  Fake News Detection
		*  Hate Speech Detection
		*  Paraphrase Identification
		*  Offensive Language Detection
		*  Sentiment Analysis
		*  Spam Detection
		*  Text Classification

	*  Multi-class classification
		*  Argumentative Text Understanding
		*  Language Identification
		*  Recognizing Textual Entailment (RTE)
		*  Text Classification
		*  Word-Sense Disambiguation


NLP Datas…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:cloud_computing_platforms&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:cloud_computing_platforms</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:cloud_computing_platforms&amp;rev=1686814574&amp;do=diff</link>
        <description>Cloud Computing Platforms

Commercial

	*  CoreWeave Low prices, good gpus.  Used by Eleuther to train GPT-NeoX

Research / Academic

	*  Google's TPU Research Cloud</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:computer_use_agents&amp;rev=1765914122&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-12-16T19:42:02+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:computer_use_agents</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:computer_use_agents&amp;rev=1765914122&amp;do=diff</link>
        <description>Computer Use Agents

Conferences and Workshops

	*  Workshop on Computer Use Agents at ICML 2025

Related Pages

	*  Autonomous Language Agents</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:conditional_computation&amp;rev=1742960607&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-03-26T03:43:27+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:conditional_computation</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:conditional_computation&amp;rev=1742960607&amp;do=diff</link>
        <description>Dynamic NNs and Conditional Computation

Dynamic neural networks use methods such as conditional computation, adaptive computation, dynamic model sparsification, early-exit approaches, etc to build larger models with less compute requirements.

Overviews</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:conditional_random_field&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:conditional_random_field</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:conditional_random_field&amp;rev=1686814574&amp;do=diff</link>
        <description>Conditional Random Fields

Conditional random fields (CRFs) are state of the art for sequence labeling tasks.

Papers

	*  Thai et al 2018 - Embedded-State Latent Conditional Random Fields for Sequence Labeling

Related Pages

	*  Graphical Models</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:confidence&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:confidence</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:confidence&amp;rev=1686814574&amp;do=diff</link>
        <description>Confidence

Evaluation Measures

TODO: literature review for evaluation measures of confidence scores.

In NLP

(search ACL Anthology for “confidence scores”)

	*  Culotta &amp; McCallum 2003 - Confidence Estimation for Information Extraction Uses three evaluation metrics of confidence scores:
		*  “Pearson’s r, a correlation coefficient ranging from -1 to 1 that measures the correlation between a confidence score and whether or not the field (or record) is correctly labeled.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:continual_learning&amp;rev=1738370722&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-02-01T00:45:22+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:continual_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:continual_learning&amp;rev=1738370722&amp;do=diff</link>
        <description>Continual Learning

Overviews

	*  Wang et al 2023 - A Comprehensive Survey of Continual Learning: Theory, Method and Application

Papers

In NLP

	*  Ermis Memory Efficient Continual Learning with Transformers
	*  Zheng et al 2025 - Spurious Forgetting in Continual Learning of Language Models Argues that the drop in performance while doing continued training “often reflects a decline in task alignment rather than knowledge loss”

Related Pages

	*  Catastrophic Forgetting
	*  Lifelong Learning…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:contrastive_learning&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:contrastive_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:contrastive_learning&amp;rev=1686814574&amp;do=diff</link>
        <description>Contrastive Learning

Contrastive learning is a good way to learn representations.  It is similar to the way self-supervised learning is used to learn representations, but uses positive and negative examples.

Overviews

	*  Jaiswal et al 2020 - A Survey on Contrastive Self-supervised Learning
	*  Le-Khac et al 2020 - Contrastive Representation Learning: A Framework and Review

Tutorials

	*  NAACL 2022 Tutorial Great tutorial

Related Pages</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:curriculum_learning&amp;rev=1719545274&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-06-28T03:27:54+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:curriculum_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:curriculum_learning&amp;rev=1719545274&amp;do=diff</link>
        <description>Curriculum Learning

Curriculum learning (CL) is where a neural network is trained on easier examples before training on harder examples.  The method for deciding which examples to train on at different times is called the curriculum, and uses a measure of difficulty for the data points.  As an example, one can train on shorter sentences before adding longer sentences to the training.  CL can help the model learn features that generalize better, and help the model learn faster.  For an overview,…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:data_augmentation&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:data_augmentation</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:data_augmentation&amp;rev=1686814574&amp;do=diff</link>
        <description>Data Augmentation

Papers

	*  Wang et al 2022 - PromDA: Prompt-based Data Augmentation for Low-Resource NLU Tasks

Related Pages

	*  Contrastive Learning
	*  Domain Adaptation
	*  NLP - Data Augmentation</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:data_cleaning_and_validation&amp;rev=1748818819&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-01T23:00:19+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:data_cleaning_and_validation</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:data_cleaning_and_validation&amp;rev=1748818819&amp;do=diff</link>
        <description>Data Cleaning and Validation

Overviews

	*  Data Lifecycle
		*  Polyzotis et al 2018 - Data Lifecycle Challenges in Production Machine Learning: A Survey


Data Cleaning

	*  Krishnan et al 2017 - BoostClean: Automated Error Detection and Repair for Machine Learning (searched “data cleaning ensembling machine learning” on Google Scholar)
	*  2017 - Data Quality Considerations for Big Data and Machine Learning: Going Beyond Data Cleaning and Transformations
	*  Liu &amp; Guo 2020 - Peer Loss Functio…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:deep_learning&amp;rev=1744090306&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-04-08T05:31:46+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:deep_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:deep_learning&amp;rev=1744090306&amp;do=diff</link>
        <description>Neural Networks and Deep Learning

Introductions and Overviews

	*  Mehta et al 2018 - A high-bias, low-variance introduction to Machine Learning for physicists

Books

Expressive Power

	*  [Representation of CNF and DNF by a neural net]

Related Pages

	*  ML Overview
	*  ML Glossary
	*  Neural Networks</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:diffusion_models&amp;rev=1749684613&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-11T23:30:13+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:diffusion_models</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:diffusion_models&amp;rev=1749684613&amp;do=diff</link>
        <description>Diffusion Models

Overviews

	*  Yang et al 2022 - Diffusion Models: A Comprehensive Survey of Methods and Applications
	*  Tutorials
		*  Nakkiran et al 2024 - Step-by-Step Diffusion: An Elementary Tutorial
		*  McAllester 2023 - On the Mathematics of Diffusion Models

	*  In NLP
		*  Zhu &amp; Zhao 2023 - Diffusion Models in NLP: A Survey
		*  Zou et al 2023 - Survey of Diffusion Models in Natural Language Processing


Key Papers

	*  Ho et al 2020 - Denoising Diffusion Probabilistic Models The on…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:distributed_training&amp;rev=1748503132&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-29T07:18:52+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:distributed_training</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:distributed_training&amp;rev=1748503132&amp;do=diff</link>
        <description>Distributed Training and Inference

Overviews

	*  Concise summary in the introduction and related work here: Aji 2017
	*  For a modern overview, see section 3.4 of Liu et al 2024 - Understanding LLMs: A Comprehensive Overview from Training to Inference
	*  A good overview is in section 3.3.2 of the The Llama 3 technical report Uses tensor parallelism, pipeline parallelism, context parallelism and data parallelism.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:distribution_shift&amp;rev=1690397979&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-07-26T18:59:39+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:distribution_shift</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:distribution_shift&amp;rev=1690397979&amp;do=diff</link>
        <description>Distribution Shift

Also known as covariate shift (shift between training and testing in the distribution of the input variables), see Bickel 2009.

Papers

	*  Bickel et al 2009 - Discriminative Learning Under Covariate Shift
	*  Duchi 2018 - Learning Models with Uniform Performance via Distributionally Robust Optimization
	*  Duchi et al 2020 - Distributionally Robust Losses for Latent Covariate Mixtures
	*  Zhou et al 2022 - Model Agnostic Sample Reweighting for Out-of-Distribution Learning

…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:edge_computing&amp;rev=1743019996&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-03-26T20:13:16+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:edge_computing</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:edge_computing&amp;rev=1743019996&amp;do=diff</link>
        <description>Edge Computing for Neural Networks

Sometimes called tiny machine learning (TinyML) which is machine learning for resourced constrained devices.

Overviews

	*  Voghoei et al 2019 - Deep Learning at the Edge
	*  2019 - Deep Learning With Edge Computing: A Review
	*  Wang et al 2019 - Convergence of Edge Computing and Deep Learning: A Comprehensive Survey
	*  Xu et al 2020 - Edge Intelligence: Architectures, Challenges, and Applications
	*  2023 - Efficient Neural Networks for Tiny Machine Learni…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:educational_framework_edf&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:educational_framework_edf</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:educational_framework_edf&amp;rev=1686814574&amp;do=diff</link>
        <description>Educational Framework

The Educational Framework (EDF) is a deep learning framework for educational purposes written in 150 lines of Python and NumPy code.  It was written by David McCallester for the course TTIC 31230: Fundamentals of Deep Learning, (Fall 2020 with videos.)

	*  Slides
	*  Code

Here is the complete source code for the framework:</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:efficient_nns&amp;rev=1746598622&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-07T06:17:02+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:efficient_nns</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:efficient_nns&amp;rev=1746598622&amp;do=diff</link>
        <description>Efficient Neural Networks

Methods having to do with efficiency in neural networks.

Overviews

	*  General
		*  Sze et al 2017 - Efficient Processing of Deep Neural Networks: A Tutorial and Survey

	*  For LLMs
		*  Wan et al 2023 - Efficient Large Language Models: A Survey
		*  Zhou et al 2024 - A Survey on Efficient Inference for Large Language Models
		*  Reasoning LLMs
			*  Wang et al 2025 - Harnessing the Reasoning Economy: A Survey of Efficient Reasoning for Large Language Models



Effi…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:em_algorithm&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:em_algorithm</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:em_algorithm&amp;rev=1686814574&amp;do=diff</link>
        <description>EM Algorithm

Older Papers

	*  Hard EM (also called “Viterbi training” or “sparse EM”)
		*  Neal &amp; Hinton 1998 - A view of the EM algorithm that justifies incremental, sparse, and other variants Calls it “sparse EM”
		*  Spitkovsky et al 2010 - Viterbi Training Improves Unsupervised Dependency Parsing
		*  Cohen &amp; Smith 2010 - Viterbi Training for PCFGs: Hardness Results and Competitiveness of Uniform Initialization


Recent Papers

	*  Nishida &amp; Nakayama 2020 - Unsupervised discourse constitue…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ensembling&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:ensembling</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ensembling&amp;rev=1686814574&amp;do=diff</link>
        <description>Ensembling

Ensembling combines several models to improve generalization performance.  For example, ensembling models trained with different random seeds almost always improves performance.  This technique is often used when performance is the main object, such as in competitions like</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:extreme_multi-label_classification&amp;rev=1749252300&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-06T23:25:00+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:extreme_multi-label_classification</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:extreme_multi-label_classification&amp;rev=1749252300&amp;do=diff</link>
        <description>Extreme Multi-Label Classification

Extreme multi-label classification (or extreme multi-label learning, XML) is the task of matching an input with 0 or more labels (the most relevant labels) from an extremely large label set.  The space of outputs is $2^L$$L$</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:fairness&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:fairness</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:fairness&amp;rev=1686814574&amp;do=diff</link>
        <description>Fairness

See related work on fairness in Duchi 2020.

	*  Dwork, et al 2011 - Fairness Through Awareness
	*  Hardt et al 2016 - Equality of Opportunity in Supervised Learning  “We show how to optimally adjust any learned predictor so as to remove discrimination according to our definition.”
	*  Kilbertus et al 2017 - Avoiding Discrimination through Causal Reasoning
	*  Kearns et al 2017 - Preventing Fairness Gerrymandering: Auditing and Learning for Subgroup Fairness
	*  Bird et al 2020 - Fairl…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:few-shot_learning&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:few-shot_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:few-shot_learning&amp;rev=1686814574&amp;do=diff</link>
        <description>Few-Shot Learning

Papers

	*  Miller et al 2000 - Learning from One Example Through Shared Densities on Transforms
	*  Lake et al 2011 - One Shot Learning of Simple Visual Concepts
	*  Koch et al 2015 - Siamese Neural Networks for One-shot Image Recognition
	*  Ravi &amp; Larochelle 2016 - Optimization as a Model for Few-Shot Learning
	*  Vinyals et al 2016 - Matching Networks for One Shot Learning
	*  Snell et al 2017 - Prototypical Networks for Few-shot Learning
	*  Sung et al 2018 - Learning to …</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:fine-tuning&amp;rev=1752478659&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-07-14T07:37:39+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:fine-tuning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:fine-tuning&amp;rev=1752478659&amp;do=diff</link>
        <description>Fine-Tuning

This page lists fine-tuning methods such as Adaptors, LoRA, BitFit, NoisyTune, etc.

Overviews

	*  Mosbach et al 2020 - On the Stability of Fine-tuning BERT: Misconceptions, Explanations, and Strong Baselines Gives a good baseline setting of hyperpameters for tuning BERT in section 6: fine-tune using ADAM with bias correction and a learning rate of 2e−5 for 20 epochs, with learning rate linearly increased for the first 10% of steps and linearly decayed to zero afterward.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gans&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:gans</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gans&amp;rev=1686814574&amp;do=diff</link>
        <description>Generative Adversarial Networks

Papers

	*  Hu et al 2017 - On Unifying Deep Generative Models Unifies GANs and Variational Autoencoders (VAEs) with the sleep-wake algorithm, and improves upon them.  See Eric's slides in his course &lt;http://www.cs.cmu.edu/~epxing/Class/10708-20/lectures.html&gt; Feb 24 and 26 for more details.
	*  Arjovsky &amp; Bottou 2017 - Towards Principled Methods for Training Generative Adversarial Networks
	*  WGANs: Arjovsky et al 2017 - Wasserstein Generative Adversarial Netwo…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gpu_deep_learning&amp;rev=1752722752&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-07-17T03:25:52+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:gpu_deep_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gpu_deep_learning&amp;rev=1752722752&amp;do=diff</link>
        <description>GPU Deep Learning

Overviews

	*  NLP 202 - Deep Learning on GPUs (start at slide 11)
	*  LLMs
		*  Zhou et al 2024 - A Survey on Efficient Inference for Large Language Models


Details of Deep Learning on GPUs

	*  NVidia Deep Learning Performance Documentation
		*  Introduction to Deep Learning on GPUs (key part here)
			*  Pdf version: GPU Performance Background User's Guide

		*  Matrix Multiplication Background
		*  Feedforward neural networks
				*  “Choose batch sizes and neuron counts gr…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gradient_clipping&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:gradient_clipping</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:gradient_clipping&amp;rev=1686814574&amp;do=diff</link>
        <description>Gradient Clipping

See section 10.11 here.

Papers

	*  Zhang et al 2020 - Why Gradient Clipping Accelerates Training: A Theoretical Justification for Adaptivity
	*  An extreme form of gradient clipping, where everything gets clipped, is the “Manhattan-Learning rule” (see the [Rprop paper]).  Rprop is an advancement over this.

Blog Posts

	*  What is gradient clipping

Related Pages

	*  Optimizers</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:graphical_models&amp;rev=1746236620&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-03T01:43:40+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:graphical_models</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:graphical_models&amp;rev=1746236620&amp;do=diff</link>
        <description>Graphical Models

Graphical models (or probabilistic graphical models, PGMs) are sub-area of machine learning and statistics.  PGMs are a framework for representing independence assumptions of random variables in probability distributions.  Broadly, the study of PGMs includes the study of algorithms for learning and inference for these complex probability distributions.  PGMs have applications in machine learning, statistics, natural language processing, speech recognition, computer vision, robo…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:graph_nn&amp;rev=1745022019&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-04-19T00:20:19+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:graph_nn</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:graph_nn&amp;rev=1745022019&amp;do=diff</link>
        <description>Graph Neural Networks

Overviews

Best overview: Wu et al 2021 - Graph Neural Networks for Natural Language Processing: A Survey

	*  General surveys (not just NLP):
		*  Wu et al 2019 - A Comprehensive Survey on Graph Neural Networks
		*  Liu  &amp; Zhou 2020 - Introduction to Graph Neural Networks
		*  Wu et al 2020 - A Comprehensive Survey on Graph Neural Networks
		*  [Graph Neural Networks: A Review of Methods and Applications]

	*  NLP Surveys:
		*  Wu et al 2021 - Graph Neural Networks for Na…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:green_ai&amp;rev=1765361571&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-12-10T10:12:51+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:green_ai</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:green_ai&amp;rev=1765361571&amp;do=diff</link>
        <description>Green AI

Papers

	*  Strubell et al 2019 - Energy and Policy Considerations for Deep Learning in NLP
	*  Patterson et al 2021 - Carbon Emissions and Large Neural Network Training
	*  2024 - Toward Sustainable GenAI using Generation Directives for Carbon-Friendly Large Language Model Inference
	*  Morrison et al 2025 - Holistically Evaluating the Environmental Impact of Creating Language Models

Workshops and Conferences

	*  NLP
		*  Workshop on Simple and Efficient Natural Language Processing …</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:history_of_ml&amp;rev=1707901677&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-02-14T09:07:57+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:history_of_ml</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:history_of_ml&amp;rev=1707901677&amp;do=diff</link>
        <description>History of Machine Learning

Early History of ML (prior to 1980)

	*  First use of the term “machine learning”: Arthur Samuel 1959 “Machine Learning is the field of study that gives computers the ability to learn without being explicitly programmed.”  Caveat: see $\xi^{(s)}$$\rho_s$</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:hugging_face&amp;rev=1700689695&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-11-22T21:48:15+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:hugging_face</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:hugging_face&amp;rev=1700689695&amp;do=diff</link>
        <description>Hugging Face

Hugging Face Transformers

	*  Transformers
	*  Notebooks
		*  &lt;https://github.com/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb&gt;
		*  Training transformer models for QA notebook github
		*  Text classification with BERT example The notebook Ritu started with

	*  Misc
		*  Using a custom model with trainer
			*  Resources for using custom models with trainer
				*  See the code here




Courses

	*  HuggingFace Youtube playlists (contains playlists of courses)
		*  …</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:hyperparameter_tuning&amp;rev=1741256430&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-03-06T10:20:30+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:hyperparameter_tuning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:hyperparameter_tuning&amp;rev=1741256430&amp;do=diff</link>
        <description>Hyperparameter Tuning

Random search within a bounding box is a good baseline method (Bergstra 2012).  Bayesian optimization methods can also be applied, see here for software implementations.  See also Wikipedia - Hyperparameter Optimization.  When publishing, it is recommended to report the method of tuning hyperparameters, the bounding box, and number of hyperparameter evaluations (</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:image_generation&amp;rev=1749111051&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-05T08:10:51+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:image_generation</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:image_generation&amp;rev=1749111051&amp;do=diff</link>
        <description>Image Generation

Papers

	*  Mansimov et al 2015 - Generating Images from Captions with Attention
	*  Reed et al 2016 - Generative Adversarial Text to Image Synthesis
	*  Gafni et al 2022 - Make-A-Scene: Scene-Based Text-to-Image Generation with Human Priors
		*  Used in the MLLM Chameleon


Related Pages

	*  Diffusion Models</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:infinite_neural_networks&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:infinite_neural_networks</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:infinite_neural_networks&amp;rev=1686814574&amp;do=diff</link>
        <description>Infinite Neural Networks

Infinite neural networks are neural networks that have an infinite number of hidden units or an infinite number of layers.

Overviews

	*  Neural Tangent Kernel
		*  Understanding the Neural Tangent Kernel (blog post)
		*  Some Math behind Neural Tangent Kernel (blog post)

	*  Lee et al 2020 - Finite Versus Infinite Neural Networks: an Empirical Study

Papers

	*  Unbounded Depth NNs: Nazaret &amp; Blei 2022 - Variational Inference for Infinitely Deep Neural Networks

Neur…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:information_theory&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:information_theory</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:information_theory&amp;rev=1686814574&amp;do=diff</link>
        <description>Information Theory

Recent Developments

	*  V-information: Xu et al 2020 - A Theory of Usable Information under Computational Constraints. A better measure of mutual information. Used in O’Connor 2021</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:knowledge_distillation&amp;rev=1747037464&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-12T08:11:04+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:knowledge_distillation</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:knowledge_distillation&amp;rev=1747037464&amp;do=diff</link>
        <description>Knowledge Distillation

Various papers related to distillation.  From Iandola 2020: “While the term 'knowledge distillation' was coined by Hinton et al. 2015 to describe a specific method and equation, the term 'distillation' is now used in reference to a diverse range of approaches where a 'student' network is trained to replicate a 'teacher' network.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:large-scale&amp;rev=1725669812&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-09-07T00:43:32+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:large-scale</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:large-scale&amp;rev=1725669812&amp;do=diff</link>
        <description>Large-Scale ML

Large-Scale NLP

Tutorial: Ilharco et al 2020 - High Performance Natural Language Processing (See the bibliography)

Conferences and Workshops

	*  MLSys

Related Pages

	*  Distributed Training
	*  Model Compression
	*  GPU Deep Learning
	*  Neural Architecture Search
	*  Systems &amp; ML</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_rate&amp;rev=1707179492&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-02-06T00:31:32+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:learning_rate</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_rate&amp;rev=1707179492&amp;do=diff</link>
        <description>Learning Rate

Overviews

	*  Choosing the learning rate
		*  Blog Post: Setting the learning rate of your neural network

	*  Learn rate schedules
		*  Blog: Learning Rate Schedules Warning: blog post - may contain errors or conceptual misunderstandings.


Learning Rate Schedule

	*  Convergence conditions To guarantee convergence to a (local) optimum, the learning rate schedule should satisfy certain conditions, see $\sum_t \alpha_t &gt; \infty$$\sum_t \alpha_t^2 &lt; \infty$$1/t$$1/\sqrt{t}$$1/\sqr…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_to_rank&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:learning_to_rank</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_to_rank&amp;rev=1686814574&amp;do=diff</link>
        <description>Learning to Rank

In learning to rank, the learner learns to rank a set of candidates.  It is evaluated using a ranking metric, so that the higher the desired outputs are in the list, the better.

Overviews

	*  Wikipedia - Learning to Rank

Related Pages

	*  Information Retrieval
	*  Recommender Systems</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_with_noise&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:learning_with_noise</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:learning_with_noise&amp;rev=1686814574&amp;do=diff</link>
        <description>Learning with Noise

Overviews

	*  2014 - Classification in the Presence of Label Noise: A Survey
	*  2019 - Image Classification with Deep Learning in the Presence of Noisy Labels: A Survey
	*  2020 - Deep learning with noisy labels: exploring techniques and remedies in medical image analysis
	*  Song et al 2020 - Learning from Noisy Labels with Deep Neural Networks: A Survey

Papers

	*  Natarajan et al 2013 - Learning with Noisy Labels
	*  Patrini et al 2017 - Making deep neural networks rob…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:log_linear_models&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:log_linear_models</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:log_linear_models&amp;rev=1686814574&amp;do=diff</link>
        <description>Log-Linear Models

Log-linear models, also known as MaxEnt models, logistic regression, or (in the structured case) conditional random fields (CRFs).

Tutorials and Introductions

	*  NLP 202 Jan 5-19
	*  Smith 2004 - Log-Linear Models</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:loss_functions&amp;rev=1721694759&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-07-23T00:32:39+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:loss_functions</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:loss_functions&amp;rev=1721694759&amp;do=diff</link>
        <description>Loss Functions

A function that is minimized during training (using gradient descent or Adam, for example) is called a loss function.

Code Examples

	*  Hugging Face
		*  Custom loss in Hugging Face trainer: Trainer


List of Loss Functions

	*  Cross-entropy (aka log loss, conditional log-likelihood, CRF loss)$L(\mathcal{D}) = -\sum_{i=1}^{N} log(p(y_i|x_i))$$p(y|x) = \frac{e^{score(x,y)}}{\sum_{y} e^{score(x,y)}}$$p(y|x) = \frac{e^{score(x,y)}}{\sum_{y} e^{score(x,y)}}$$L(\mathcal{D}) = -\sum…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:mechanistic_interpretability&amp;rev=1748863421&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-02T11:23:41+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:mechanistic_interpretability</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:mechanistic_interpretability&amp;rev=1748863421&amp;do=diff</link>
        <description>Mechanistic Interpretability

Mechanistic interpretability research has been done in NLP before the term was invented, under other names.  See Mechanistic? for important historical context.

Overviews

	*  Bereska &amp; Gavves 2024 - Mechanistic Interpretability for AI Safety - A Review
	*  Rai et al 2024 - A Practical Review of Mechanistic Interpretability for Transformer-Based Language Models paper list: github
	*  Sharkey et al 2025 - Open Problems in Mechanistic Interpretability
	*  Lin et al 20…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:membership_inference&amp;rev=1772922257&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-03-07T22:24:17+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:membership_inference</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:membership_inference&amp;rev=1772922257&amp;do=diff</link>
        <description>Membership Inference

Overviews

	*  Hu et al 2021 - Membership Inference Attacks on Machine Learning: A Survey

In NLP

	*  In LLMs
		*  Song &amp; Shmatikov 2018 - Auditing Data Provenance in Text-Generation Models Membership inference attack for language models
		*  Shi et al 2023 - Detecting Pretraining Data from Large Language Models
		*  Duan et al 2024 - Do Membership Inference Attacks Work on Large Language Models?
		*  Kassem et al 2024 -  Alpaca against Vicuna: Using LLMs to Uncover Memori…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:meta-learning&amp;rev=1699559084&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-11-09T19:44:44+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:meta-learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:meta-learning&amp;rev=1699559084&amp;do=diff</link>
        <description>Meta-Learning

Overviews

	*  Hospedales et al 2020 - Meta-Learning in Neural Networks: A Survey

AutoML

	*  2019 - AutoML: A Survey of the State-of-the-Art
	*  2019 - Benchmark and Survey of Automated Machine Learning Frameworks
	*  2020 - Automated Machine Learning: The New Wave of Machine Learning

Deep Learning Papers

	*  Hochreiter &amp; Younger 2001 - Learning to Learn Using Gradient Descent Amazing paper from one of the inventors of LSTMs.  Jeurgen talks about it here.
	*  Andrychowicz et a…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:miscellaneous_neural_networks&amp;rev=1709864041&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-03-08T02:14:01+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:miscellaneous_neural_networks</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:miscellaneous_neural_networks&amp;rev=1709864041&amp;do=diff</link>
        <description>Miscellaneous Neural Networks

Convex Neural Networks

	*  Amos et al 2016 - Input Convex Neural Networks
	*  Sivaprasad et al 2020 - The Curious Case of Convex Neural Networks

Hopfield Networks

	*  Ramsauer et al 2020 - Hopfield Networks is All You Need

Related Pages

	*  Infinite Neural Networks
	*  Neural Module Networks</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:mixture_of_expert_models&amp;rev=1748677248&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-31T07:40:48+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:mixture_of_expert_models</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:mixture_of_expert_models&amp;rev=1748677248&amp;do=diff</link>
        <description>Mixture of Expert (MoE) Models

Mixture of expert (MoE) models, focusing on sparse MoE models.

Overviews

	*  Fedus et al 2022 - A Review of Sparse Expert Models in Deep Learning
	*  For LLMs
		*  Cai et al 2024 - A Survey on Mixture of Experts (Focuses on LLMs)


Foundational and Early Papers

	*  Shazeer et al 2017 - Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer

MoE Large Language Models

	*  Fedus et al 2021 - Switch Transformers: Scaling to Trillion Parame…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_glossary&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:ml_glossary</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_glossary&amp;rev=1686814574&amp;do=diff</link>
        <description>Machine Learning Glossary

Glossary of machine learning terms and their definitions.  This aims to be a list of terms found at conferences like NeurIPS (see for example the list of 2020 papers).

	*  Bandit
	*  Deep InfoMax
	*  Covariate Shift
	*  Determinantal Point Process</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_outline&amp;rev=1765914042&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-12-16T19:40:42+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:ml_outline</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_outline&amp;rev=1765914042&amp;do=diff</link>
        <description>Outline of Machine Learning

See also Outline of NLP and Overview of ML

	*  ML Glossary
	*  Applications
		*  Automatic Theorem Proving
		*  Application: Optimization
		*  Computer Use Agents

	*  Traditional ML Topics, see Overview of ML
		*  Bayesian Methods
		*  Classification
		*  Clustering
		*  Conditional Random Field
		*  Decision Trees
		*  EM Algorithm
		*  Ensembling
		*  Graphical Models
		*  Large-Scale ML
		*  Learning to Rank
		*  Log-Linear Models
		*  Online Learning
		*  Reinf…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_overview&amp;rev=1732744618&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-11-27T21:56:58+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:ml_overview</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:ml_overview&amp;rev=1732744618&amp;do=diff</link>
        <description>Machine Learning Overview

This page is a concise overview of topics in machine learning, with links to readings and other learning materials.  Roughly, these topics are the union of topics covered in various ML books and courses.

This is a resource to help you get up to speed in various topics if you're trying to learn ML on your own or broaden your ML knowledge.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_compression&amp;rev=1747040456&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-12T09:00:56+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:model_compression</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_compression&amp;rev=1747040456&amp;do=diff</link>
        <description>Model Compression

See also Sparsity in Neural Networks.

Overviews

	*  General
		*  Cheng et al 2017 - A Survey of Model Compression and Acceleration for Deep Neural Networks
		*  Hoefler et al 2021 - Sparsity in Deep Learning: Pruning and growth for efficient inference and training in neural networks
		*  2023 - Model Compression for Deep Neural Networks: A Survey
		*  See chapter 4 of 2023 - Efficient Neural Networks for Tiny Machine Learning: A Comprehensive Review

	*  LLMs and Transformer…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_editing_and_unlearning&amp;rev=1751872429&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-07-07T07:13:49+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:model_editing_and_unlearning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_editing_and_unlearning&amp;rev=1751872429&amp;do=diff</link>
        <description>Model Editing and Unlearning

Model editing is where a model, such as a large language model, is “edited” to change the facts in the model.  Machine unlearning is where a trained model is adjusted to “remove” one or more datapoints that were used to train the model, so that it behaves like a model that was trained without those datapoints.  The datapoints to remove can either be specific datapoints from the training set, or classes of datapoints, such as all datapoints about bioweapons.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_selection&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:model_selection</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:model_selection&amp;rev=1686814574&amp;do=diff</link>
        <description>Model Selection

Using held-out dev data is the best way to do model selection.  See &lt;http://www.econ.upf.edu/~lugosi/esaimsurvey.pdf&gt;:
“But whatever promising the pre-testing method may look like, it will be hard to convince practitioners to abandon cross-validation and other resampling methods. Indeed, a straightforward analysis of the hold-out approach to model selection suggests that hold-out enjoys almost all the desirable features of any foreseeable model selection method.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:modularity&amp;rev=1709864194&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-03-08T02:16:34+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:modularity</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:modularity&amp;rev=1709864194&amp;do=diff</link>
        <description>Modularity (in Neural Networks)

Overviews

	*  Pfeiffer et al 2023 - Modular Deep Learning

Modularity Papers

	*  Zhang et al 2023 - Emergent Modularity in Pre-trained Transformers

Neural Module Networks

	*  Andreas et al 2016 - Neural Module Networks
	*  Andreas et al 2016 - Learning to Compose Neural Networks for Question Answering Video NAACL 2016 best paper

People

	*  Jacob Andreas (Neural Module Networks)

Related Pages

	*  Neurosymbolic Methods</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:multi-task_learning&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:multi-task_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:multi-task_learning&amp;rev=1686814574&amp;do=diff</link>
        <description>Multi-Task Learning

Papers

In NLP

	*  Kaiser et al 2017 - One Model To Learn Them All Almost the same authors as the Transformer (submitted four days after the Transformer).  Interesting they didn't use the Transformer.  Perhaps they got the inspiration from this work.
	*  DecaNLP: McCann et al 2018 - The Natural Language Decathlon: Multitask Learning as Question Answering slides dataset

Datasets</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_architecture_search&amp;rev=1746434615&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-05T08:43:35+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:neural_architecture_search</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_architecture_search&amp;rev=1746434615&amp;do=diff</link>
        <description>Neural Architecture Search

Overviews

	*  White et al 2023 - Neural Architecture Search: Insights from 1000 Papers

Papers

	*  Zoph &amp; Le 2016 - Neural Architecture Search with Reinforcement Learning
	*  Kaiser et al 2017 - One Model To Learn Them All
	*  ENAS: Pham et al 2018 - Efficient Neural Architecture Search via Parameter Sharing
	*  DARTS: Liu et al 2018 - DARTS: Differentiable Architecture Search
	*  So et al 2019 - The Evolved Transformer
	*  Serianni &amp; Kalita 2023 - Training-free Neu…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_networks&amp;rev=1744090293&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-04-08T05:31:33+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:neural_networks</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_networks&amp;rev=1744090293&amp;do=diff</link>
        <description>Neural Networks

Taxonomy of Neural Networks

	*  Feedforward Neural Networks
	*  Recurrent Neural Networks
	*  Auto-encoders
	*  Boltzmann Machines, Restricted Boltzmann Machines (RBMs)
	*  Hopfield Networks

Representational Power

	*  Neural networks can represent arbitrary CNF and DNF boolean functions: see</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_network_psychology&amp;rev=1743035611&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-03-27T00:33:31+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:neural_network_psychology</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:neural_network_psychology&amp;rev=1743035611&amp;do=diff</link>
        <description>Neural Network Psychology

“Neural network psychology” is the study of what neural networks learn and why they make the predictions they do.  See below for examples.

Papers

	*  Shi et al 2016 - Why Neural Translations are the Right Length
	*  Shi et al 2016 - Does String-Based Neural MT Learn Source Syntax?
	*  Linzen et al 2016 - Assessing the Ability of LSTMs to Learn Syntax-Sensitive Dependencies
	*  Tenney et al 2019 - What do you learn from context? Probing for sentence structure in conte…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_architectures&amp;rev=1742888085&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-03-25T07:34:45+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:nn_architectures</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_architectures&amp;rev=1742888085&amp;do=diff</link>
        <description>Neural Network Architectures

Overviews

	*  Yu et al 2019 - A Review of Recurrent Neural Networks: LSTM Cells and Network Architectures
	*  Wu et al 2019 - A Comprehensive Survey on Graph Neural Networks
	*  Narang et al 2021 - Do Transformer Modifications Transfer Across Implementations and Applications? Comparison of many Transformer model variants

Feedforward Networks

	*  Highway networks
	*  GLU (also considered a kind of activation, but it's more like a FF architecture).  Variants: Shaze…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_initialization&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:nn_initialization</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_initialization&amp;rev=1686814574&amp;do=diff</link>
        <description>Neural Network Initialization

Overviews

	*  Blog post: How to initialize deep neural networks? Xavier and Kaiming initialization
	*  Section 8.4 in Deep Learning Book Ch 8
	*  Initialization section in Chapter 11 of Hands-on machine learning with Scikit-Learn, Keras, and TensorFlow (UCSC login required)
	*  NLP 202 Winter 2022 slides

Papers

	*  Glorot (Xavier) initialization: Glorot &amp; Bengio 2010 - Understanding the Difficulty of Training Deep Feedforward Neural Networks (use with sigmoid ac…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_sparsity&amp;rev=1746640793&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-07T17:59:53+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:nn_sparsity</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_sparsity&amp;rev=1746640793&amp;do=diff</link>
        <description>Sparsity in Neural Networks

Overviews

	*  Hoefler et al 2021 - Sparsity in Deep Learning: Pruning and growth for efficient inference and training in neural networks

Papers

	*  Zhou et al 2021 - Learning N:M Fine-grained Structured Sparse Neural Networks From Scratch

Related Pages

	*  Model Compression
	*  Pruning &amp; Sparsification
	*  Sparse Autoencoders</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_training&amp;rev=1720564168&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-07-09T22:29:28+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:nn_training</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_training&amp;rev=1720564168&amp;do=diff</link>
        <description>Neural Network Training

Overviews

	*  Chapter 11 of Hands-on machine learning with Scikit-Learn, Keras, and TensorFlow (UCSC login required)  Excellent introduction to training neural networks
	*  Deep Learning Chapter 8: Training Deep Models
	*  Smith 2018 - A Disciplined Approach to Neural Network Hyper-parameters Leslie's opinon, but has some good insights
	*  Karpathy 2019 - A Recipe for Training Neural Networks Good advice, especially looking at the data
	*  NLP 202 Winter 2022 - Training…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_tricks&amp;rev=1697062740&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-10-11T22:19:00+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:nn_tricks</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:nn_tricks&amp;rev=1697062740&amp;do=diff</link>
        <description>Neural Network Tricks

Overviews

	*  NLP 202 lecture: Training Deep Neural Networks (Winter 2022)

	*  Training Tricks (see NN Training)
		*  Initialization
		*  Normalization
		*  Learning Rate Schedule
		*  Gradient clipping Pascanu et al 2012
		*  Scheduled Sampling
		*  Curriculum Learning
		*  Overcoming Catastrophic Forgetting
		*  Adjust the batch size, or use gradient accumulation (see this blog, for example) to simulate larger batch sizes</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:normalization&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:normalization</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:normalization&amp;rev=1686814574&amp;do=diff</link>
        <description>Normalization

Normalization can improve the optimizer's ability to train a neural network.  There are two main categories of normalization procedures: activation normalization and weight normalization (Shen 2020).

Overviews

	*  Blog post: 2019 - Normalization Techniques in Deep Neural Networks

Activation Normalization Schemes</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimization&amp;rev=1709762240&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-03-06T21:57:20+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:optimization</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimization&amp;rev=1709762240&amp;do=diff</link>
        <description>Optimization

Lectures and Books

	*  Duchi - Introductory Lectures on Stochastic Optimization
	*  Bottou et al 2016 - Optimization Methods for Large-Scale Machine Learning Survey paper that includes theory results with proofs (even includes the rate of convergence of SGD on non-convex objectives)
	*  Black-box Optimization
	*  Boyd &amp; Vandenberghe - Convex Optimization
	*  Luenberger - Linear and Nonlinear Programming Great book

Theory

	*  Agarwal, Leon Bottou 2015 - A Lower Bound for the Opti…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimization_in_deep_learning&amp;rev=1742863782&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-03-25T00:49:42+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:optimization_in_deep_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimization_in_deep_learning&amp;rev=1742863782&amp;do=diff</link>
        <description>Optimization Topics in Deep Learning

Effects on Optimization

	*  Batch normalization
		*  Makes the objective function and gradients smoother (more Lipschitz) (Santurkar 2018 - How Does Batch Normalization Help Optimization?. Another perspective: see section 3 of De 2020.)

	*  Weight normalization
		*  Improves the conditioning of the optimization problem (</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimizers&amp;rev=1743019366&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-03-26T20:02:46+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:optimizers</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:optimizers&amp;rev=1743019366&amp;do=diff</link>
        <description>Optimizers

Survey Papers

	*  Introduction: Ruder 2016 - An Overview of Gradient Descent Optimization Algorithms blog post
	*  Overviews
		*  Bottou et al 2016 - Optimization Methods for Large-Scale Machine Learning
		*  Sun et al 2019 - A Survey of Optimization Methods from a Machine Learning Perspective Very good
		*  Kashyap 2022 - A survey of deep learning optimizers - first and second order methods

	*  Book Chapters
		*  Deep Learning Chapter 8: Training Deep Models

	*  Blog posts
		*  O…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:privacy&amp;rev=1737484903&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-01-21T18:41:43+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:privacy</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:privacy&amp;rev=1737484903&amp;do=diff</link>
        <description>Privacy in Machine Learning

Overviews

	*  Mireshghallah et al 2020 - Privacy in Deep Learning: A Survey

Related Pages

	*  Copyright Issues
	*  Extracting Knowledge from Language Models
	*  Membership Inference</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:probabilistic_logic&amp;rev=1688604445&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-07-06T00:47:25+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:probabilistic_logic</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:probabilistic_logic&amp;rev=1688604445&amp;do=diff</link>
        <description>Probabilistic Logic

	*  Probabilistic Soft Logic

Also neural + logic

Papers

	*  Applications
		*  Pryor et al 2023 - Using Commonsense to Guide Dialog Structure Induction via Neural Probabilistic Soft Logic (At ACL 2023)


People

	*  Lise Getoor

Related Pages

	*  Neurosymbolic Methods</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:program_induction&amp;rev=1702683190&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-12-15T23:33:10+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:program_induction</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:program_induction&amp;rev=1702683190&amp;do=diff</link>
        <description>Program Induction

	*  Gaunt 2016 - TerpreT: A Probabilistic Programming Language for Program Induction “The inference task is to observe a set of input-output examples and infer the underlying program... (we) automatically perform inference using four different back-ends that include machine learning and program synthesis approaches. These are based on gradient descent (thus each specification can be seen as a differentiable interpreter), linear program (LP) relaxations for graphical models, di…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:pytorch&amp;rev=1699564213&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-11-09T21:10:13+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:pytorch</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:pytorch&amp;rev=1699564213&amp;do=diff</link>
        <description>PyTorch

	*  Tutorials
		*  &lt;https://pytorch.org/tutorials/&gt;
		*  &lt;https://pytorch.org/tutorials/beginner/nn_tutorial.html&gt;
		*  &lt;https://pytorch.org/tutorials/recipes/recipes/loading_data_recipe.html&gt; (DataLoader)
		*  &lt;https://pytorch.org/tutorials/beginner/data_loading_tutorial.html&gt;
		*  &lt;https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html&gt;
		*  &lt;https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html&gt; Good seq2seq tutorial and starter code (does…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:quantum_machine_learning&amp;rev=1759817717&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-10-07T06:15:17+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:quantum_machine_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:quantum_machine_learning&amp;rev=1759817717&amp;do=diff</link>
        <description>Quantum Computing and Quantum Machine Learning

Quantum machine learning is machine learning algorithms for quantum computers.

Overviews

	*  García et al 2022 - Systematic Literature Review: Quantum Machine Learning and its Applications
	*  Quantum Algorithms for Data Analysis Book Ch 6 - Quantum Perceptron

Papers

	*  Quantum Machine Learning
		*  Wiebe et al 2016 - Quantum Perceptron Models
		*  Tacchino et al 2019 - An artificial neuron implemented on an actual quantum processor pdf
		*  W…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:regularization&amp;rev=1710401906&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-03-14T07:38:26+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:regularization</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:regularization&amp;rev=1710401906&amp;do=diff</link>
        <description>Regularization

Regularization in Deep Learning

Dropout

	*  Dropout: Hinton et al 2012 - Improving Neural Networks by Preventing Co-adaptation of Feature Detectors
	*  DropConnect: Wang et al 2013 - Regularization of Neural Networks using DropConnect
	*  Wang &amp; Manning 2013 - Fast dropout training Shows that dropout is an approximation to an objective, and directly optimizes a fast approximation to this objective. “We show how to do fast dropout training by sampling from or integrating a Gauss…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:reinforcement_learning&amp;rev=1752471605&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-07-14T05:40:05+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:reinforcement_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:reinforcement_learning&amp;rev=1752471605&amp;do=diff</link>
        <description>Reinforcement Learning

Overviews

	*  Blogs and Tutorials
		*  OpenAI Intro to RL Good intro to RL, with emphasis on deep learning methods

	*  Books and Chapters
		*  Chapter 18 - Reinforcement Learning (UCSC only) from Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow 2nd Ed.  Good, concise introduction.

	*  Lectures and Slides
		*  Lecture 14: Reinforcement Learning

	*  Overview papers
		*  Levine et al 2020 - Offline Reinforcement Learning: Tutorial, Review, and Perspecti…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:sampling&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:sampling</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:sampling&amp;rev=1686814574&amp;do=diff</link>
        <description>Sampling

Examples of Samping in NLP and ML

	*  Dropout is an approximation: Wang &amp; Manning 2013 - Fast Dropout Training
	*  Negative Sampling: Goldberg &amp; Levy 2014 - word2vec Explained: Deriving Mikolov et al.’s Negative-Sampling Word-Embedding Method

Related Pages

See also Statistics - Sampling.</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:scaling_laws&amp;rev=1748819368&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-01T23:09:28+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:scaling_laws</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:scaling_laws&amp;rev=1748819368&amp;do=diff</link>
        <description>Scaling Laws

Scaling laws are used to pick optimal hyperparameters for large models.

Papers

	*  Kaplan et al 2020 - Scaling Laws for Neural Language Models
	*  Hoffmann et al 2022 - Training Compute-Optimal Large Language Models
	*  Tay et al 2022 - Scaling Laws vs Model Architectures: How Does Inductive Bias Influence Scaling?
	*  Ruan et al 2024 - Observational Scaling Laws and the Predictability of Language Model Performance Does a multi-dimensional regression (fitting a sigmoid) to predic…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:self-play&amp;rev=1686941992&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-16T18:59:52+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:self-play</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:self-play&amp;rev=1686941992&amp;do=diff</link>
        <description>Self-Play and AI Feedback

Self play is where a model interacts with itself to improve - for example self-play against itself to get better at a game. 

Papers

	*  AlphaGo: Silver et al 2016 - Mastering The Game of Go with Deep Neural Networks and Tree Search

NLP Papers

	*  Fu et al 2023 - Improving Language Model Negotiation with Self-Play and In-Context Learning from AI Feedback

Related Pages

	*  GANs
	*  Human-in-the-Loop
	*  Reinforcement Learning</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:self-supervised_learning&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:self-supervised_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:self-supervised_learning&amp;rev=1686814574&amp;do=diff</link>
        <description>Self-Supervised Learning

Related Pages

	*  BERT and Friends
	*  Contrastive Learning
	*  Semi-supervised Learning</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:semi-supervised_learning&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:semi-supervised_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:semi-supervised_learning&amp;rev=1686814574&amp;do=diff</link>
        <description>Semi-Supervised Learning

Overviews

	*  Yang et al 2021 - A Survey on Deep Semi-supervised Learning

Papers

	*  NoisyStudent: Xie et al 2019 - Self-training with Noisy Student improves ImageNet classification Works even with lots of labeled data. Used in Zhang 2022

Related Pages

	*  Data Augmentation
	*  Domain Adaptation</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:software&amp;rev=1760031629&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-10-09T17:40:29+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:software</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:software&amp;rev=1760031629&amp;do=diff</link>
        <description>Software

See also this list of ML related software.

Deep Learning

Current, Recommended Frameworks

	*  Pytorch
		*  Huggingface
		*  LLMs
			*  Unsloth github Incredibly fast. As of March 2025, this is the fastest single GPU framework for LLMs
			*  llama.cpp
			*  LLVM

		*  Multi-GPU training</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:state-space_models&amp;rev=1755885492&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-08-22T17:58:12+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:state-space_models</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:state-space_models&amp;rev=1755885492&amp;do=diff</link>
        <description>State-Space Models

Overviews

	*  Survey Papers
		*  2024 - Mamba-360: Survey of State Space Models as Transformer Alternative for Long Sequence Modeling: Methods, Applications, and Challenges

	*  Papers with Good Overviews
		*  Gu et al 2020 - HiPPO: Recurrent Memory with Optimal Polynomial Projections
		*  S4 model: Gu et al 2021 - Efficiently Modeling Long Sequences with Structured State Spaces Good intro to state spaces
		*  Orvieto et al 2023 - Resurrecting Recurrent Neural Networks for L…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:support_vector_machines&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:support_vector_machines</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:support_vector_machines&amp;rev=1686814574&amp;do=diff</link>
        <description>Support Vector Machines

Papers

	*  Optimizing SVMs in the primal:
		*  Chapelle 2006 - Training a Support Vector Machine in the Primal
		*  Ratliff et al 2007 - (Online) Subgradient Methods for Structured Prediction

	*  The idea for training neural networks with SVM loss came from Ronan Collobert's 2004 PhD Thesis and this paper: Collobert 2004 which has some conceptual errors (mainly that SVM loss is not Perceptron loss).

Structured SVM (SSVM)</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:systems_ml&amp;rev=1752819997&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-07-18T06:26:37+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:systems_ml</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:systems_ml&amp;rev=1752819997&amp;do=diff</link>
        <description>Systems &amp; ML

Papers related to systems (making things efficient) and machine learning research.

Papers

	*  2024 - InferCept: Efficient Intercept Support for Augmented Large Language Model Inference
	*  Horton et al 2024 - KV Prediction for Improved Time to First Token

Conferences and Workshops

	*  MLSys
	*  Efficient Systems for Foundation Models (Workshop)
	*  International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) (Some ML systems papers …</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:tensorflow&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:tensorflow</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:tensorflow&amp;rev=1686814574&amp;do=diff</link>
        <description>TensorFlow

Debugging

	*  Printing values (during training, etc).
		*  Use tf.print(), instead of print().  See here.


Profiling

	*  Tensorflow - Optimize TensorFlow GPU Performance with the TensorFlow Profiler

Operations

	*  Cumulative sum and max
		*  Cumulative sum: tf.math.cumsum
		*  Cumulative max: Stack Overflow or tf_extended


Related Pages

	*  Hugging Face
	*  PyTorch
	*</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:theory&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:theory</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:theory&amp;rev=1686814574&amp;do=diff</link>
        <description>Machine Learning: Theory

Overviews and Books

Misc Papers

Historical Theory Papers

See History of ML - Theory

Related Pages

	*  Binary Classification
		*  Generalization in Deep Learning
		*  History of ML - Theory Contains early theory papers
		*  Multi-Armed Bandit
		*  Learning Curves
		*  Regret Bounds
		*  Reinforcement Learning</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:trustworthy_ai&amp;rev=1748625055&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-30T17:10:55+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:trustworthy_ai</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:trustworthy_ai&amp;rev=1748625055&amp;do=diff</link>
        <description>Trustworthy AI

Overviews

	*  Liu et al 2021 - Trustworthy AI: A Computational Perspective
	*  Li et al 2021 - Trustworthy AI: From Principles to Practices
	*  Kaur et al 2022 - Trustworthy Artificial Intelligence: A Review
	*  Wu et al 2023 - Survey of Trustworthy AI: A Meta Decision of AI
	*  LLMs
		*  Liu et al 2023 - Trustworthy LLMs: a Survey and Guideline for Evaluating Large Language Models' Alignment


Papers

	*  Jacovi et al 2020 - Formalizing Trust in Artificial Intelligence: Prerequ…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:visualizing_neural_networks&amp;rev=1719547132&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-06-28T03:58:52+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:visualizing_neural_networks</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:visualizing_neural_networks&amp;rev=1719547132&amp;do=diff</link>
        <description>Visualizing Neural Networks

	*  Karpathy et al 2015 - Visualizing and Understanding Recurrent Networks
	*  Strobelt et al 2016 - LSTMVis: A Tool for Visual Analysis of Hidden State Dynamics in Recurrent Neural Networks demo
	*  (Also the Transformer visualization demos at ACL 2020)
	*  Tenney et al 2020 - The Language Interpretability Tool: Extensible, Interactive Visualizations and Analysis for NLP Model
	*  Langedijk et al 2023 - DecoderLens: Layerwise Interpretation of Encoder-Decoder Transf…</description>
    </item>
    <item rdf:about="https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:zero-shot_learning&amp;rev=1686814574&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-06-15T07:36:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ml:zero-shot_learning</title>
        <link>https://jlab.soe.ucsc.edu/nlp-wiki/doku.php?id=ml:zero-shot_learning&amp;rev=1686814574&amp;do=diff</link>
        <description>Zero-Shot Learning

In zero-shot learning, at test time the learner is given samples from new classes that were not observed during training, and must correctly predict the new class they belong to. Methods for zero-shot learning usually associate observed and non-observed classes through a form of auxiliary information such as vector embeddings or symbolic attributes for the classes. See</description>
    </item>
</rdf:RDF>
