@InProceedings{petrov-haghighi-klein:2008:EMNLP,
author = {Petrov, Slav and Haghighi, Aria and Klein, Dan},
title = {Coarse-to-Fine Syntactic Machine Translation using Language Projections},
booktitle = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing},
month = {October},
year = {2008},
address = {Honolulu, Hawaii},
publisher = {Association for Computational Linguistics},
pages = {108--116},
url = {http://www.aclweb.org/anthology/D08-1012}
}
The intersection of tree transducer-based translation models
with n-gram language models results in huge dynamic
programs for machine translation decoding. We propose a
multipass, coarse-to-fine approach in which the language
model complexity is incrementally introduced. In contrast
to previous *order-based* bigram-to-trigram approaches,
we focus on *encoding-based* methods, which use a
clustered encoding of the target language. Across various
hierarchical encoding schemes and for multiple language
pairs, we show speed-ups of up to 50 times over single-pass
decoding while improving BLEU score. Moreover, our entire
decoding cascade for trigram language models is faster than
the corresponding bigram pass alone of a bigram-to-trigram
decoder.
@InProceedings{petrov-klein:2008:EMNLP,
author = {Petrov, Slav and Klein, Dan},
title = {Sparse Multi-Scale Grammars for Discriminative Latent Variable Parsing},
booktitle = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing},
month = {October},
year = {2008},
address = {Honolulu, Hawaii},
publisher = {Association for Computational Linguistics},
pages = {867--876},
url = {http://www.aclweb.org/anthology/D08-1091}
}
We present a discriminative, latent variable approach to
syntactic parsing in which rules exist at multiple scales
of refinement. The model is formally a latent variable
CRF grammar over trees, learned by iteratively splitting
grammar productions (not categories). Different regions
of the grammar are refined to different degrees, yielding
grammars which are three orders of magnitude smaller
than the single-scale baseline and 20 times smaller than
the split-and-merge grammars of Petrov et al. 2006.
In addition, our discriminative approach integrally admits
features beyond local tree configurations. We present a
multi-scale training method along with an efficient
CKY-style dynamic program. On a variety of domains
and languages, this method produces the best published
parsing accuracies with the smallest reported grammars.
@inproceedings{favre-etal:2008:SLT,
author = {Favre, Benoit and Hakkani-T\"ur, Dilek and Petrov, Slav and Klein, Dan},
title = {{Efficient Sentence Segmentation Using Syntactic Features}},
booktitle = {Spoken Language Technologies (SLT)},
year = {2008},
address = {Goa, India}
}
To enable downstream language processing, automatic speech
recognition output must be segmented into its individual sentences.
Previous sentence segmentation systems have typically been very
local, using low-level prosodic and lexical features to independently
decide whether or not to segment at each word boundary position.
In this work, we leverage global syntactic information from a syn-
tactic parser, which is better able to capture long distance depen-
dencies. While some previous work has included syntactic features,
ours is the first to do so in a tractable, lattice-based way, which is
crucial for scaling up to long-sentence contexts. Specifically, an ini-
tial hypothesis lattice is constrcuted using local features. Candidate
sentences are then assigned syntactic language model scores. These
global syntactic scores are combined with local low-level scores in
a log-linear model. The resulting system significantly outperforms
the most popular long-span model for sentence segmentation (the
hidden event language model) on both reference text and automatic
speech recognizer output from news broadcasts.
@InProceedings{petrov-klein:2008:PaGe,
author = {Petrov, Slav and Klein, Dan},
title = {Parsing {German} with Latent Variable Grammars},
booktitle = {Proceedings of the Workshop on Parsing German at ACL '08},
month = {June},
year = {2008},
address = {Columbus, Ohio},
publisher = {Association for Computational Linguistics},
pages = {33--39},
url = {http://www.aclweb.org/anthology/W/W08/W08-1005}
}
We describe experiments on learning latent variable
grammars for various German treebanks, using a
language-agnostic statistical approach. In our method,
a minimal initial grammar is hierarchically refined
using an adaptive split-and-merge EM procedure,
giving compact, accurate grammars. The learning
procedure directly maximizes the likelihood of the
training treebank, without the use of any language
specific or linguistically constrained features.
Nonetheless, the resulting grammars encode many
linguistically interpretable patterns and give the best
published parsing accuracies on three German
treebanks.
@InProceedings{petrov-klein:2008:NIPS2008,
author = {Slav Petrov and Dan Klein},
title = {Discriminative Log-Linear Grammars with Latent Variables},
booktitle = {Advances in Neural Information Processing Systems 20 (NIPS)},
editor = {J.C. Platt and D. Koller and Y. Singer and S. Roweis},
publisher = {MIT Press},
address = {Cambridge, MA},
pages = {1153--1160},
year = {2008},
url = {http://books.nips.cc/papers/files/nips20/NIPS2007_0630.pdf}
}
We demonstrate that log-linear grammars with latent variables can be
practically trained using discriminative methods. Central to
efficient discriminative training is a hierarchical pruning procedure
which allows feature expectations to be efficiently approximated
in a gradient-based procedure. We compare L1 and L2 regularization
and show that L1 regularization is superior, requiring fewer iterations
to converge, and yielding sparser solutions. On full-scale treebank
parsing experiments, the discriminative latent models outperform both
the comparable generative latent models as well as the discriminative
non-latent baselines.
@InProceedings{petrov-pauls-klein:2007:EMNLP-CoNLL2007,
author = {Petrov, Slav and Pauls, Adam and Klein, Dan},
title = {Learning Structured Models for Phone Recognition},
booktitle = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)},
pages = {897--905},
year = {2007},
url = {http://www.aclweb.org/anthology/D/D07/D07-1094}
}
We present a maximally streamlined approach to learning
HMM-based acoustic models for automatic speech recognition.
In our approach, an initial monophone HMM is iteratively
refined using a split-merge EM procedure which makes no
assumptions about subphone structure or context-dependent
structure, and which uses only a single Gaussian per HMM
state. Despite the much simplified training process, our
acoustic model achieves state-of-the-art results on phone
classification (where it outperforms almost all other methods) and
competitive performance on phone recognition (where it
outperforms standard CD triphone / subphone / GMM approaches).
We also present an analysis of what is and is not learned by
our system.
@InProceedings{liang-EtAl:2007:EMNLP-CoNLL2007,
author = {Liang, Percy and Petrov, Slav and Jordan, Michael and Klein, Dan},
title = {The Infinite {PCFG} Using Hierarchical {Dirichlet} Processes},
booktitle = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)},
pages = {688--697},
year = {2007},
url = {http://www.aclweb.org/anthology/D/D07/D07-1072}
}
We present a nonparametric Bayesian model
of tree structures based on the hierarchical
Dirichlet process (HDP). Our HDP-PCFG
model allows the complexity of the grammar
to grow as more training data is available.
In addition to presenting a fully Bayesian
model for the PCFG, we also develop an efficient
variational inference procedure. On
synthetic data, we recover the correct grammar
without having to specify its complexity
in advance. We also show that our techniques
can be applied to full-scale parsing
applications by demonstrating its effectiveness
in learning state-split grammars.
@inproceedings{Petrov-Klein-2007:AAAI,
author = {Slav Petrov and Dan Klein},
title = {Learning and Inference for Hierarchically Split {PCFG}s}
booktitle = {AAAI 2007 (Nectar Track)},
year = {2007},
url = {www.eecs.berkeley.edu/~petrov/data/aaai2007.pdf},
}
Treebank parsing can be seen as the search for an optimally
refined grammar consistent with a coarse training treebank.
We describe a method in which a minimal grammar is hier-
archically refined using EM to give accurate, compact gram-
mars. The resulting grammars are extremely compact com-
pared to other high-performance parsers, yet the parser gives
the best published accuracies on several languages, as well
as the best generative parsing numbers in English. In addi-
tion, we give an associated coarse-to-fine inference scheme
which vastly improves inference time with no loss in test set
accuracy.
@InProceedings{petrov-klein:2007:main,
author = {Petrov, Slav and Klein, Dan},
title = {Improved Inference for Unlexicalized Parsing},
booktitle = {Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference},
month = {April},
year = {2007},
address = {Rochester, New York},
publisher = {Association for Computational Linguistics},
pages = {404--411},
url = {http://www.aclweb.org/anthology/N/N07/N07-1051}
}
We present several improvements to unlexicalized
parsing with hierarchically state-split PCFGs. First,
we present a novel coarse-to-fine method in which
a grammar's own hierarchical projections are used
for incremental pruning, including a method for efficiently
computing projections of a grammar without
a treebank. In our experiments, hierarchical
pruning greatly accelerates parsing with no loss in
empirical accuracy. Second, we compare various
inference procedures for state-split PCFGs from the
standpoint of risk minimization, paying particular
attention to their practical tradeoffs. Finally, we
present multilingual experiments which show that
parsing with hierarchical state-splitting is fast and
accurate in multiple languages and domains, even
without any language-specific tuning.
@inproceedings{Petrov-EtAl:2006:TRECVID,
author = {Slav Petrov and Arlo Faria and Pascal Michaillat and Alexander Berg and Andreas Stolcke and Dan Klein and Jitendra Malik},
title = {Detecting Categories in News Video using Acoustic, Speech and Image Features},
booktitle = {Proceedings of (VIDEO) TREC (TrecVid 2006)},
year = {2006},
url = {www.eecs.berkeley.edu/~petrov/data/trecvid06.pdf},
}
This work describes systems for detecting semantic categories
present in news video. The multimedia data was processed in
three ways: the audio signal was converted to a sequence of
acoustic features, automatic speech recognition provided a
word-level transcription, and image features were computed for
selected frames of the video signal. Primary acoustic, speech,
and vision systems were trained to discriminate instances of
the categories. Higher-level systems exploited correlations
among the categories, incorporated sequential context, and
combined the joint evidence from the three information sources.
We present experimental results from the TREC video retrieval
evaluation.
@InProceedings{petrov-EtAl:2006:COLACL,
author = {Petrov, Slav and Barrett, Leon and Thibaux, Romain and Klein, Dan},
title = {Learning Accurate, Compact, and Interpretable Tree Annotation},
booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics},
month = {July},
year = {2006},
address = {Sydney, Australia},
publisher = {Association for Computational Linguistics},
pages = {433--440},
url = {http://www.aclweb.org/anthology/P/P06/P06-1055}
}
We present an automatic approach to tree annotation
in which basic nonterminal symbols are alternately
split and merged to maximize the likelihood
of a training treebank. Starting with a simple Xbar
grammar, we learn a new grammar whose nonterminals
are subsymbols of the original nonterminals.
In contrast with previous work, we are able
to split various terminals to different degrees, as appropriate
to the actual complexity in the data. Our
grammars automatically learn the kinds of linguistic
distinctions exhibited in previous work on manual
tree annotation. On the other hand, our grammars
are much more compact and substantially more accurate
than previous work on automatic annotation.
Despite its simplicity, our best grammar achieves
an F1 of 89.9% on the Penn Treebank, higher than
most fully lexicalized systems.
@InProceedings{petrov-barrett-klein:2006:CoNLL-X,
author = {Petrov, Slav and Barrett, Leon and Klein, Dan},
title = {Non-Local Modeling with a Mixture of {PCFG}s},
booktitle = {Proceedings of the Tenth Conference on Computational Natural Language Learning (CoNLL-X)},
month = {June},
year = {2006},
address = {New York City},
publisher = {Association for Computational Linguistics},
pages = {14--20},
url = {http://www.aclweb.org/anthology/W/W06/W06-2903}
}
While most work on parsing with PCFGs
has focused on local correlations between
tree configurations, we attempt to model
non-local correlations using a finite mixture
of PCFGs. A mixture grammar fit
with the EM algorithm shows improvement
over a single PCFG, both in parsing
accuracy and in test data likelihood. We
argue that this improvement comes from
the learning of specialized grammars that
capture non-local correlations.
@inproceedings{Tomasi-Petrov-Sastry-2003:ICCV,
author = {Carlo Tomasi and Slav Petrov and Arvind Sastry},
title = {3{D} Tracking = {C}lassification + {I}nterpolation},
booktitle = {Proceedings of the Ninth IEEE International Conference on Computer Vision (ICCV)},
year = {2003},
url = {www.eecs.berkeley.edu/~petrov/data/iccv03.pdf},
}
Hand gestures are examples of fast and complex motions.
Computers fail to track these in fast video, but sleight of
hand fools humans as well: what happens too quickly we
just cannot see. We show a 3D tracker for these types of
motions that relies on the recognition of familiar configurations
in 2D images (classification), and fills the gaps
in-between (interpolation). We illustrate this idea with experiments
on hand motions similar to finger spelling. The
penalty for a recognition failure is often small: if two con-
figurations are confused, they are often similar to each
other, and the illusion works well enough, for instance, to
drive a graphics animation of the moving hand. We contribute
advances in both feature design and classifier training:
our image features are invariant to image scale, translation,
and rotation, and we propose a classification method
that combines VQPCA with discrimination trees.
@mastersthesis{Petrov-Masters,
author = {Slav Petrov},
title = {Computer vision, sensor fusion, and behavior control for soccer playing robots},
school = {Freie Universitaet Berlin}
year = {2004},
url = {www.eecs.berkeley.edu/~petrov/data/slav_diplom_arbeit.pdf},
}
This Master's thesis describes parts of the control software
used by the soccer robots of the Free University of Berlin,
the so called FU-Fighters. The FU-Fighters compete in the
Middle Sized League of RoboCup and reached the semi-finals
during the 2004 RoboCup World Cup in Lisbon, Portugal. The
thesis covers several independent topics:
- Automatic White Balance: It is shown how to improve the
white balancing of an omni-directional camera by using a
reference color and a PID-controller.
- Ball Tracking: The reliable tracking of the ball is vital
in robot soccer. Therefore a Kalman-filter based system for
estimating the ball position and velocity in the presence
of occlusions is developped.
- Sensor Fusion: The robot perceives its environment through
several independent sensors (camera, odometer, etc.), which
have different delays. We propose a novel method for fusing
the sensor data and show our results through examples of
selflocalization.
- Behavior Control: Finally we show how all these elements
can be incorporated into a goal keeping robot. We develop
simple behaviors that can be used in a layered architecture
and enable the robot to block most balls that are being shot
at the goal.
Unpublished Reports
- Thesis Proposal. Quals Slides.
- Kernel Fusion for Video Retrieval Tasks,
CS 281B Spring 2006 (P. Bartlett) class project with Pascal Michaillat.
[abstract]
[paper (pdf)]
[poster (pdf)]
- Transfer of Grammatical Structure,
CS 294 Fall 2005 (M. Jordan, P. Bartlett, S. Russell) class project with Leon Barrett and Romain Thibaux.
[abstract]
[paper (pdf)]
[slides (pdf)]
- Parsing with a Mixture of PCFGs,
CS 288 Spring 2005 (R. Wilensky) class project with Leon Barrett.
[abstract]
[paper (pdf)]
- Image Segmentation with Maximum Cuts,
CS 270 Spring 2005 (C. Papadimitriou) class project.
[abstract]
[paper (pdf)]
- Unsupervised Segmentation of Bilingual Text,
CS 281A Fall 2004 (M. Jordan) and CS 294 (D. Klein) class project.
[abstract]
[paper (pdf)]
[slides (pdf)]
- Landscape of Wireless Applications in the US Marketplace,
MOT, Spring 2006 (A. Isaacs) class project with John Chuang, Madeleine Moss, Tracy Olsen, and Richard Teo.
[paper (pdf)]
[slides (pdf)]
- Report on Hologic,
MOT, Spring 2006 (H. Chesbrough) class project with Janet Blumenfeld, Jinghua Luo, Daniel Mandel and Ryan White.
[paper (pdf)]
Here are some pictures from past trips.
- Mexico (Tulum, March 2009),
- Thanksgiving 2008,
- Nicaragua (November 2008),
- New York (Summer 2008),
- Brazil (Rio, Salvador, etc., May 2008),
- Colombia (Bogota and Cartagena, March 2008),
- Oktoberfest (September 2007),
- Amsterdam (September 2007),
- Vancouver (July 2007),
- Prague (June 2007),
- Roadtrip Pacific Northwest: San Francisco to Seattle (May 2007),
- Niagara Falls and Toronto (April 2007),
- Hawaii: Big Island and Oahu, (March 2007),
- Qatar (January 2007),
- London (December 2006),
- Hawaii: Maui, (September 2006),
- Australia: Sydney, Cairns, Ayers Rock/Uluru, Melbourne (July 2006),
- World Cup in Germany 2006,
- Italy 2006 (Torino/Biella/Milano),
- Roadtrip through the National Parks of the West (California, Nevada, Arizona, Utah, July 2005),
- Canada: Ottawa and Montreal (July 2005),
-
San Diego (June 2005),
- Duke (May 2005),
- Costa Rica (Spring Break 2005)
-
"The Gates" in New York (Feb. 2005),
-
Pre-Prelim Dinner (Feb. 2005) and
Post-Prelim Party,
-
Thanksgiving 2004 at Lake Tahoe,
-
San Diego (Oct. 2004),
-
The Big Game,
-
Halloween 2004.
-
Birthday 2004 in Berlin,
- EuroCup 2004 in Portugal,
- Napoli, Capri & Positano (June 2004)
Pictures from 2002/2003 can still be found on my old Duke webapge.
Home address:
48 Senior Ave
Berkeley, CA, 94708 |
Office address:
525 Soda Hall
Berkeley, CA 94720
|
Email: slav@petrovi.de
Slav Petrov - Слав Петров, September 2008