%% This BibTeX bibliography file was created using BibDesk. %% http://bibdesk.sourceforge.net/ %% Created for Irfan Essa at 2009-10-21 14:15:50 -0400 %% Saved with string encoding Unicode (UTF-8) @inproceedings{Kim:2009:Augmenting-Aerial, Abstract = {We introduce methods for augmenting aerial visualizations of Earth (from tools such as Google Earth or Microsoft Virtual Earth) with dynamic information obtained from videos. Our goal is to make Augmented Earth Maps that visualize the live broadcast of dynamic sceneries within a city. We propose different approaches to analyze videos of pedestrians and cars, under differing conditions and then augment Aerial Earth Maps (AEMs) with live and dynamic information. We also analyze natural phenomenon (clouds) and project information from these to the AEMs to add the visual reality.}, Author = {K. Kim and S. Oh and J. Lee and I. Essa}, Booktitle = {Proceedings of IEEE/ACM International Symposium on Mixed and Augmented Reality (ISMAR)}, Date-Added = {2009-10-21 13:46:45 -0400}, Date-Modified = {2009-10-21 13:51:49 -0400}, Keywords = {Animation; Computational Journalism; Image-based Rendering; Computational Photography & Video}, Month = {October}, Title = {Augmenting Aerial Earth Maps with Dynamic Information}, Topic = {Animation; Computational Journalism; Image-based Rendering; Computational Photography & Video}, Year = {2009}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/augearth/}, Bdsk-Url-2 = {http://www.kihwan23.com/augearth/augearth_ismar09_kim.avi}, Bdsk-Url-3 = {http://www.youtube.com/watch?v=TPk88soc2qw&feature=player_embedded}} @inproceedings{Yin:2009:Learning-Basic, Abstract = {The natural language for most deaf signers in the United States is American Sign Language (ASL). ASL has internal structure like spoken languages, and ASL linguists have introduced several phonemic models. The study of ASL phonemes is not only interesting to linguists, but also useful for scalability in recognition by machines. Since machine perception is different than human perception, this paper learns the basic units for ASL directly from data. Comparing with previous studies, our approach computes a set of data-driven units (fenemes) discriminatively from the results of segmental feature selection. The learning iterates the following two steps: first apply discriminative feature selection segmentally to the signs, and then tie the most similar temporal segments to re-train. Intuitively, the sign parts indistinguishable to machines are merged to form basic units, which we call ASL fenemes. Experiments on publicly available ASL recognition data show that the extracted data-driven fenemes are meaningful, and recognition using those fenemes achieves improved accuracy at reduced model complexity. }, Annote = {The natural language for most deaf signers in the United States is American Sign Language (ASL). ASL has internal structure like spoken languages, and ASL linguists have introduced several phonemic models. The study of ASL phonemes is not only interesting to linguists, but also useful for scalability in recognition by machines. Since machine perception is different than human perception, this paper learns the basic units for ASL directly from data. Comparing with previous studies, our approach computes a set of data-driven units (fenemes) discriminatively from the results of segmental feature selection. The learning iterates the following two steps: first apply discriminative feature selection segmentally to the signs, and then tie the most similar temporal segments to re-train. Intuitively, the sign parts indistinguishable to machines are merged to form basic units, which we call ASL fenemes. Experiments on publicly available ASL recognition data show that the extracted data-driven fenemes are meaningful, and recognition using those fenemes achieves improved accuracy at reduced model complexity}, Author = {P. Yin and T. Starner and H. Hamilton and I. Essa and J. M. Rehg}, Booktitle = {Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)}, Date-Added = {2009-09-15 18:59:32 -0400}, Date-Modified = {2009-10-21 13:44:33 -0400}, Doi = {http://dx.doi.org/10.1109/ICASSP.2009.4960694}, Keywords = {Activity Recognition; Audio-Visual Tracking; Face & Gesture; Multimodal Interfaces; Speech Reading}, Month = {April}, Pages = {4757-4760}, Title = {Learning Basic Units in American Sign Language using Discriminative Segmental Feature Selection}, Topic = {Activity Recognition; Audio-Visual Tracking; Face & Gesture; Multimodal Interfaces; Speech Reading}, Year = {2009}, Bdsk-Url-1 = {http://portal.acm.org/citation.cfm?id=1583194}, Bdsk-Url-2 = {http://dx.doi.org/10.1109/ICASSP.2009.4960694}} @inproceedings{Flagg:2009:Human-Video, Abstract = {This paper describes a data-driven approach for generating photorealistic animations of human motion. Each animation sequence follows a user-choreographed path and plays continuously by seamlessly transitioning between different segments of the captured data. To produce these animations, we capitalize on the complementary characteristics of motion capture data and video. We customize our capture system to record motion capture data that are synchronized with our video source. Candidate transition points in video clips are identified using a new similarity metric based on 3-D marker trajectories and their 2-D projections into video. Once the transitions have been identified, a video-based motion graph is constructed. We further exploit hybrid motion and video data to ensure that the transitions are seamless when generating animations. Motion capture marker projections serve as control points for segmentation of layers and nonrigid transformation of regions. This allows warping and blending to generate seamless in-between frames for animation. We show a series of choreographed animations of walks and martial arts scenes as validation of our approach.}, Author = {M. Flagg and A. Nakazawa and Q. Zhang and S. B. Kang and Y. K. Ryu and I. Essa and J. M. Rehg}, Booktitle = {Proceedings of the ACM Symposium on Interactive 3D Graphics and Games 2009 (I3D '09)}, Date-Added = {2009-09-15 18:56:15 -0400}, Date-Modified = {2009-10-21 13:44:54 -0400}, Doi = {http://doi.acm.org/10.1145/1507149.1507182}, Keywords = {Animation; Motion Capture; Computer Graphics; Computer Vision; Computational Photography & Video}, Month = {March}, Pages = {199--206}, Pdf = {http://www.cc.gatech.edu/cpl/projects/humanvideotextures/HVT.pdf}, Title = {Human Video Textures}, Topic = {Animation; Motion Capture; Computer Graphics; Computer Vision; Computational Photography & Video}, Video = {http://www.cc.gatech.edu/cpl/projects/humanvideotextures/hvt-i3d.avi}, Website = {http://www.cc.gatech.edu/cpl/projects/humanvideotextures/index.html}, Year = {2009}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/humanvideotextures/HVT.pdf}, Bdsk-Url-2 = {http://www.cc.gatech.edu/cpl/projects/humanvideotextures/hvt-i3d.avi}, Bdsk-Url-3 = {http://doi.acm.org/10.1145/1507149.1507182}} @inproceedings{Diakopoulos:2009:Videolyzer:-Quality, Abstract = {Tools to aid people in making sense of the information quality of online informational video are essential for media consumers seeking to be well informed. Our application, Videolyzer, addresses the information quality problem in video by allowing politically motivated bloggers or journalists to analyze, collect, and share criticisms of the information quality of online political videos. Our interface innovates by providing a fine-grained and tightly coupled interaction paradigm between the timeline, the time-synced transcript, and annotations. We also incorporate automatic textual and video content analysis to suggest areas of interest for further assessment by a person. We present an evaluation of Videolyzer looking at the user experience, usefulness, and behavior around the novel features of the UI as well as report on the collaborative dynamic of the discourse generated with the tool.}, Author = {N. Diakopoulos and S. Goldenberg and I. Essa}, Booktitle = {ACM Conference on Human Factors in Computing Systems (CHI)}, Date-Added = {2009-09-15 18:53:51 -0400}, Date-Modified = {2009-10-21 13:45:07 -0400}, Doi = {http://doi.acm.org/10.1145/1518701.1518824}, Keywords = {Computational Journalism; Computational Photography & Video; Authoring; Peer Production}, Month = {April}, Pages = {799-808}, Pdf = {http://www.deakondesign.com/Documents/paper0553-diakopoulos_dl.pdf}, Title = {Videolyzer: Quality Analysis of Online Informational Video for Bloggers and Journalists}, Topic = {Computational Journalism; Computational Photography & Video; Authoring; Peer Production}, Website = {http://www.videolyzer.com}, Year = {2009}, Bdsk-Url-1 = {http://www.deakondesign.com/Documents/paper0553-diakopoulos_dl.pdf}, Bdsk-Url-2 = {http://www.deakondesign.com/videos/videolyzer_chi_video.php}, Bdsk-Url-3 = {http://www.videolyzer.com/}, Bdsk-Url-4 = {http://doi.acm.org/10.1145/1518701.1518824}} @article{Kwatra:2009:Fluid-Simulation, Abstract = {We present an algorithm for creating realistic animations of characters that are swimming through fluids. Our approach combines dynamic simulation with data-driven kinematic motions (motion capture data) to produce realistic animation in a fluid. The interaction of the articulated body with the fluid is performed by incorporating joint constraints with rigid animation and by extending a solid/fluid coupling method to handle articulated chains. Our solver takes as input the current state of the simulation and calculates the angular and linear accelerations of the connected bodies needed to match a particular motion sequence for the articulated body. These accelerations are used to estimate the forces and torques that are then applied to each joint. Based on this approach, we demonstrate simulated swimming results for a variety of different strokes, including crawl, backstroke, breaststroke and butterfly. The ability to have articulated bodies interact with fluids also allows us to generate simulations of simple water creatures that are driven by simple controllers.}, Author = {N. Kwatra and C. Wojtan and M. Carlson and I. Essa and P. Mucha and Greg Turk}, Date-Added = {2009-09-15 18:49:26 -0400}, Date-Modified = {2009-10-21 13:43:35 -0400}, Doi = {http://doi.ieeecomputersociety.org/10.1109/TVCG.2009.66}, Journal = {IEEE Transactions on Visualization and Computer Graphics}, Keywords = {Animation;Motion Capture; Computer Graphics}, Month = {June}, Title = {Fluid Simulation with Articulated Bodies}, Topic = {Animation; Motion Capture; Computer Graphics}, Year = {2009}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/swimmer/}, Bdsk-Url-2 = {http://www.cc.gatech.edu/cpl/projects/swimmer/paper/MF.pdf}, Bdsk-Url-3 = {http://doi.ieeecomputersociety.org/10.1109/TVCG.2009.66}, Bdsk-Url-4 = {http://}} @article{Hamid:2009:A-Novel-Sequence, Abstract = {Formalizing computational models for everyday human activities remains an open challenge. Many previous approaches towards this end assume prior knowledge about the structure of activities, using which explicitly defined models are learned in a completely supervised manner. For a majority of everyday environments how- ever, the structure of the in situ activities is generally not known a priori. In this paper we investigate knowledge representations and manipulation techniques that facilitate learning of human activities in a minimally supervised manner. The key contribution of this work is the idea that global structural information of human activities can be encoded using a subset of their local event subsequences, and that this encoding is sufficient for activity-class discovery and classification. In particular, we investigate modeling activity sequences in terms of their con- stituent subsequences that we call event n-grams. Exploiting this representation, we propose a computational framework to automatically discover the various activity- classes taking place in an environment. We model these activity-classes as max- imally similar activity-cliques in a completely connected graph of activities, and describe how to discover them efficiently. Moreover, we propose methods for finding characterizations of these discovered classes from a holistic as well as a by-parts perspective. Using such characterizations, we present a method to classify a new ac- tivity to one of the discovered activity-classes, and to automatically detect whether it is anomalous with respect to the general characteristics of its membership class. Our results show the efficacy of our approach in a variety of everyday environments.}, Author = {R. Hamid and S. Maddi and A. Johnson and A. Bobick and I. Essa and C. Isbell}, Date-Added = {2009-09-15 18:46:12 -0400}, Date-Modified = {2009-10-21 09:05:39 -0400}, Journal = {Artificial Intelligence Journal}, Keywords = {AI; Aware Home; Activity Recognition; Machine Learning; Computer Vision}, Pdf = {http://www.raffayhamid.com/hamid_aij_09.pdf}, Title = {A Novel Sequence Representation for Unsupervised Analysis of Human Activities}, Topic = {AI; Aware Home; Activity Recognition; Machine Learning; Computer Vision}, Year = {2009}, Bdsk-Url-1 = {http://www.raffayhamid.com/hamid_aij_09.pdf}} @inproceedings{Grundmann:2008:3D-Shape-Context, Abstract = {We propose the use of 3D (2D+time) Shape Context to recognize the spatial and temporal details inherent in human actions. We represent an action in a video sequence by a 3D point cloud extracted by sampling 2D silhouettes over time. A non-uniform sampling method is introduced that gives preference to fast moving body parts using a Euclidean 3D Distance Transform. Actions are then classified by matching the extracted point clouds. Our proposed approach is based on a global matching and does not require specific training to learn the model. We test the approach thoroughly on two publicly available datasets and compare to several state-of-the-art methods. The achieved classification accuracy is on par with or superior to the best results reported to date.}, Annote = {Acceptance Rate: 18%}, Author = {M. Grundmann and F. Meier and I. Essa}, Booktitle = {Proceedings of International Conference on Pattern Recognition (ICPR)}, Date-Added = {2008-08-22 11:39:08 -0400}, Date-Modified = {2009-10-21 09:21:01 -0400}, Doi = {http://dx.doi.org/10.1109/ICPR.2008.4761435}, Keywords = {Activity Recognition; Computer Vision}, Pages = {1-4}, Title = {3D Shape Context and Distance Transform for Action Recognition}, Topic = {Activity Recognition; Computer Vision}, Year = {2008}, Bdsk-Url-1 = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4761435}, Bdsk-Url-2 = {http://dx.doi.org/10.1109/ICPR.2008.4761435}} @inproceedings{Kim:2008:Localization-and-3D-Reconstruction, Abstract = {Using off-the-shelf Global Positioning System (GPS) units, we reconstruct buildings in 3D by exploiting the reduction in signal to noise ratio (SNR) that occurs when the buildings obstruct the line-of-sight between the moving units and the orbiting satellites. We measure the size and height of skyscrapers as well as automatically constructing a density map representing the location of multiple buildings in an urban landscape. }, Author = {K. Kim and J. Summet and T. Starner and D. Ashbrook and M. Kapade and I. Essa}, Booktitle = {Proceedings of IEEE International Symposium on Wearable Computers (ISWC)}, Date-Added = {2008-08-21 13:12:44 -0400}, Date-Modified = {2009-10-19 17:18:47 -0400}, Doi = {http://dx.doi.org/10.1109/ISWC.2008.4911576}, Keywords = {Peer Production; Computational Journalism; Mobile Computing}, Location = {Pittsburgh, PA, USA}, Pages = {11--14}, Pdf = {http://www.cc.gatech.edu/research/reports/GT-IC-08-06}, Publisher = {IEEE Computer Society}, Title = {Localization and 3D Reconstruction of Urban Scenes Using GPS}, Topic = {Peer Production; Computational Journalism; Mobile Computing}, Video = {http://www.youtube.com/watch?v=eYMfaEaGeME}, Website = {Add data for field: Website}, Year = {2008}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/ISWC.2008.4911576}} @inproceedings{Diakopoulos:2008:An-Annotation-Model, Abstract = {Making sense of the information quality of online media including things such as the accuracy and validity of claims and the reliability of sources is essential for people to be well-informed. We are developing Videolyzer to address the challenge of information quality sense-making by allowing motivated individuals to analyze, collect, share, and respond to criticisms of the information quality of online political videos and their transcripts. In this paper specifically we present a model of how the annotation ontology and collaborative dynamics embedded in Videolyzer can enhance information quality.}, Author = {N. Diakopoulos and I. Essa}, Booktitle = {Proceedings of International Conference on the Pragmatic Web 2008}, Date-Added = {2008-08-21 10:24:49 -0400}, Date-Modified = {2009-10-21 10:25:34 -0400}, Doi = {http://doi.acm.org/10.1145/1479190.1479195}, Keywords = {Computational Journalism; Multimedia}, Pages = {31-34}, Title = {An Annotation Model for Making Sense of Information Quality in Online Videos}, Topic = {Computational Journalism; Multimedia}, Year = {2008}, Bdsk-Url-1 = {http://portal.acm.org/citation.cfm?id=1479190.1479195}, Bdsk-Url-2 = {http://doi.acm.org/10.1145/1479190.1479195}} @inproceedings{Diakopoulos:2008:Audio-Puzzler:, Abstract = {We have developed an audio-based casual puzzle game which produces a time-stamped transcription of spoken audio as a by-product of play. Our evaluation of the game indicates that it is both fun and challenging. The transcripts generated using the game are more accurate than those produced using a standard automatic transcription system and the time-stamps of words are within several hundred milliseconds of ground truth. }, Author = {N. Diakopoulos and K. Luther and I. Essa}, Booktitle = {Proceedings of ACM International Conference on Multimedia}, Date-Added = {2008-08-21 09:50:13 -0400}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Journalism; Multimedia}, Title = {Audio Puzzler: Piecing Together Time-Stamped Speech Transcripts with a Puzzle Game}, Topic = {Computational Journalism; Multimedia}, Year = {2008}} @inproceedings{Minnen:2007:Detecting-Subdimensional, Abstract = {Discovering recurring patterns in time series data is a fundamental problem for temporal data mining. This paper addresses the problem of locating subdimensional motifs in real-valued, multivariate time series, which requires the simultaneous discovery of sets of recurring patterns along with the corresponding relevant dimensions. While many approaches to motif discovery have been developed, most are restricted to categorical data, univariate time series, or multivariate data in which the temporal patterns span all of the dimensions. In this paper, we present an expected linear-time algorithm that addresses a generalization of multivariate pattern discovery in which each motif may span only a subset of the dimensions. To validate our algorithm, we discuss its theoretical properties and empirically evaluate it using several data sets including synthetic data and motion capture data collected by an on-body inertial sensor.}, Annote = {Acceptance: 19%}, Author = {D. Minnen and I. Essa and C. Isbell and T. Starner}, Booktitle = {Proceedings of IEEE International Conference on Data Mining (ICDM)}, Date-Added = {2008-04-14 16:30:44 -0400}, Date-Modified = {2009-10-21 14:14:49 -0400}, Doi = {http://dx.doi.org/10.1109/ICDM.2007.52}, Keywords = {Activity Recognition; Machine Learning; Data Mining}, Month = {October}, Pdf = {http://www.cc.gatech.edu/people/home/dminn/papers/minnen-icdm2007.pdf}, Read = {Yes}, Title = {Detecting Subdimensional Motifs: An Efficient Algorithm for Generalized Multivariate Pattern Discovery}, Topic = {Activity Recognition; Machine Learning; Data Mining}, Year = {2007}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAagAAAAAAagAAgAAC0RhdmluY2ktTUFDAAAAAAAAAAAAAAAAAAAAAMWWIj1IKwAAALxZLxNtaW5uZW4taWNkbTIwMDcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADv7gxClGP1BERiBwcnZ3AAIABQAACSAAAAAAAAAAAAAAAAAAAAAGUGFwZXJzABAACAAAxZZojQAAABEACAAAxCl+fwAAAAEAGAC8WS8AvFktALxZKwC8WDwAu3n8ALM11wACAEBEYXZpbmNpLU1BQzpVc2VyczppcmZhbjpTeW5jOlRleHQ6QmliOlBhcGVyczptaW5uZW4taWNkbTIwMDcucGRmAA4AKAATAG0AaQBuAG4AZQBuAC0AaQBjAGQAbQAyADAAMAA3AC4AcABkAGYADwAYAAsARABhAHYAaQBuAGMAaQAtAE0AQQBDABIANFVzZXJzL2lyZmFuL1N5bmMvVGV4dC9CaWIvUGFwZXJzL21pbm5lbi1pY2RtMjAwNy5wZGYAEwABLwAAFQACAAz//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QLi4uLy4uL1N5bmMvVGV4dC9CaWIvUGFwZXJzL21pbm5lbi1pY2RtMjAwNy5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKACTAJOAlMCXAJnAmsCeQKAAokCugK/AsICzwLUAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAuY=}, Bdsk-Url-1 = {http://www.cc.gatech.edu/~dminn/papers/minnen-icdm2007.pdf}} @inproceedings{Yin:2008:Discriminative-Feature, Abstract = {We address the feature selection problem for hidden Markov models (HMMs) in sequence classification. Temporal correlation in sequences often causes difficulty in applying feature selection techniques. Inspired by segmental k-means segmentation (SKS), we propose Segmentally Boosted HMMs (SBHMMs), where the stateoptimized features are constructed in a segmental and discriminative manner. The contributions are twofold. First, we introduce a novel feature selection algorithm, where the temporal dynamics are decoupled from the static learning procedure by assuming that the sequential data are piecewise independent and identically distributed. Second, we show that the SBHMM consistently improves traditional HMM recognition in various domains. The reduction of error compared to traditional HMMs ranges from 17\% to 70\% in American Sign Language recognition, human gait identification, lip reading, and speech recognition.}, Address = {Las Vegas, Nevada, USA}, Annote = {Acceptance: 48\%}, Author = {P. Yin and I. Essa and T. Starner and J. M. Rehg}, Booktitle = {Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)}, Date-Added = {2008-04-09 23:03:44 -0400}, Date-Modified = {2009-10-21 14:12:54 -0400}, Doi = {http://dx.doi.org/10.1109/ICASSP.2008.4518031}, Keywords = {Machine Learning; Face & Gesture}, Month = {March}, Pdf = {http://www.cc.gatech.edu/~pyin/pdf/SBHMMICASSP08.pdf}, Read = {Yes}, Title = {Discriminative Feature Selection for Hidden Markov Models Using Segmental Boosting}, Topic = {Machine Learning; Face & Gesture}, Year = 2008, Bdsk-Url-1 = {http://www.cc.gatech.edu/~pyin/pdf/SBHMMICASSP08.pdf}, Bdsk-Url-2 = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4518031}} @inproceedings{Abowd:2000:Living-laboratories:, Address = {New York, NY, USA}, Author = {G. Abowd and C. Atkeson and A. Bobick and I. Essa and B. MacIntyre and E. Mynatt and T. Starner}, Booktitle = {Proceedings of ACM CHI Conference on Human factors in Computing Systems}, Date-Modified = {2008-09-03 16:26:03 -0400}, Isbn = {1-58113-248-4}, Keywords = {Aware Home; Intelligent Environments}, Location = {The Hague, The Netherlands}, Note = {(extended abstracts)}, Pages = {215--216}, Publisher = {ACM Press}, Title = {Living laboratories: the future computing environments group at the Georgia Institute of Technology}, Topic = {Aware Home; Intelligent Environments}, Url = {http://doi.acm.org/10.1145/633292.633416}, Year = {2000}, Bdsk-Url-1 = {http://doi.acm.org/10.1145/633292.633416}} @inproceedings{Abowd:2002:The-Aware-Home:, Address = {Edmonton, Alberta, Canada}, Author = {G. Abowd and A. Bobick and I. Essa and E. Mynatt and W. Rogers}, Booktitle = {Proceedings of AAAI Workshop on Automation as a Care Giver}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Aware Home; Aging-in-Place; Intelligent Environments}, Organization = {AAAI}, Series = {Held in conjunction with American Association of Artificial Intelligence (AAAI) Conference 2002}, Title = {The Aware Home: Developing Technologies for Successful Aging}, Topic = {Aware Home; Aging-in-Place; Intelligent Environments}, Year = {2002}} @article{Angelov:2005:Experiences-with, Abstract = {We explore optimization strategies and resulting performance of two stream-based video applications, video texture and color tracker, on a cluster of SMPs. The two applications are representative of a class of emerging applications, which we call stream-based applications, that are sensitive to both latency of individual results and overall throughput. Such applications require non-trivial parallelization techniques in order to improve both latency and throughput, given that the stream data emanates from a limited set of sources (exactly one in the two applications studied) and that the distribution of the data cannot be done a priori. We suggest techniques that address in a coordinated fashion the problems of data distribution and work partitioning. We believe the two problems are related and need to be addressed together. We have parallelized two applications using the Stampede cluster programming system that provides abstractions for implementing time- and throughput-sensitive applications elegantly and efficiently. For the Video Textures application we show that we can achieve a speedup of 24.26 on a 112 processor cluster. For the Color Tracker application, where latency is more crucial, we identify the extent of data parallelism that ensures that the slowest member of the pipeline is no longer the bottleneck for achieving a decent frame rate.}, Author = {Y. Angelov and U. Ramachandran and K. Mackenzie and J. M. Rehg and I. Essa}, Bibsource = {DBLP, http://dblp.uni-trier.de}, Date-Modified = {2008-09-03 16:31:56 -0400}, Journal = {Journal of Parallel and Distributed Computing}, Keywords = {Computational Photography & Video; Systems}, Number = {6}, Pages = {678-691}, Title = {Experiences with optimizing two stream-based applications for cluster execution.}, Topic = {Computational Photography & Video; Systems}, Url = {http://dx.doi.org/10.1016/j.jpdc.2005.02.002}, Volume = {65}, Year = {2005}, Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.jpdc.2005.02.002}} @inproceedings{Basu:1996:Motion-Regularization, Author = {S. Basu and I. Essa and A. Pentland}, Booktitle = {Proceedings of International Conference on Pattern Recognition (ICPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Face & Gesture; Multimodal Interfaces; Computer Vision}, Month = {October}, Title = {Motion Regularization for Model-based Head Tracking}, Topic = {Face & Gesture; Multimodal Interfaces; Computer Vision}, Year = {1996}} @inproceedings{Brand:1995:Causal-Analysis, Author = {M. Brand and I. Essa}, Booktitle = {Proceedings of AAAI Fall Symposium on Computational Models for Integrating Language and Vision}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Face & Gesture; Multimodal Interfaces; Computer Vision}, Note = {Available from {\tt www.cc.gatech.edu/fac/irfan/pubs/}}, Title = {Causal Analysis for Visual Gesture Understanding}, Topic = {Face & Gesture; Multimodal Interfaces; Computer Vision}, Year = {1995}} @inproceedings{Brostow:2004:Novel-Skeletal, Abstract = {Volumetric structures are frequently used as shape descriptors for 3D data. The capture of such data is being facilitated by developments in multi-view video and range scanning, extending to subjects that are alive and moving. In this paper, we examine vision-based modeling and the related representation of moving articulated creatures using spines. We define a spine as a branching axial structure representing the shape and topology of a 3D objects limbs, and capturing the limbs correspondence and motion over time. Our spine concept builds on skeletal representations often used to describe the internal structure of an articulated object and the significant protrusions. The algorithms for determining both 2D and 3D skeletons generally use an objective function tuned to balance stability against the responsiveness to detail. Our representation of a spine provides for enhancements over a 3D skeleton, afforded by temporal robustness and correspondence. We also introduce a probabilistic framework that is needed to compute the spine from a sequence of surface data. We present a practical implementation that approximates the spines joint probability function to reconstruct spines for synthetic and real subjects that move.}, Anotate = {41 oral out of 555 = 7.4 percent plus 149 posters}, Author = {G. J. Brostow and I. Essa and D. Steedly and V. Kwatra}, Booktitle = {Proceedings of European Conference on Computer Vision (ECCV)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.cc.gatech.edu/cpl/projects/spines/}, Keywords = {Computer Vision; Image-based Modeling; Computer Animation; Human Motion Analysis}, Pages = {Vol III: 66-78}, Pdf = {http://www.cc.gatech.edu/cpl/projects/spines/Spines_ECCV2004.pdf}, Title = {Novel Skeletal Representation For Articulated Creatures}, Topic = {Computer Vision; Image-based Modeling; Computer Animation; Human Motion Analysis}, Url = {http://www.cc.gatech.edu/cpl/projects/spines/}, Year = {2004}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/spines/}} @inproceedings{Brostow:1999:Motion-Based, Abstract = {We present a method to decompose video sequences into layers that represent the relative depths of complex scenes. Our method combines spatial information with temporal occlusions to determine relative depths of these layers. Spatial information is obtained through edge detection and a customized contour completion algorithm. Activity in a scene is used to extract temporal occlusion events, which are in turn, used to classify objects as occluders or occludees. The path traversed by the moving objects determines the segmentation of the scene. Several examples of decompositing and compositing of video are shown. This approach can be applied in the pre-processing of sequences for compositing or tracking purposes and to determine the approximate 3D structure of a scene.}, Anotate = {46 oral out of 575 = 8 percent plus 127 posters}, Author = {G. J. Brostow and I. Essa}, Booktitle = {Proceedings of IEEE International Conference on Computer Vision (ICCV)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.cc.gatech.edu/cpl/projects/layering/}, Keywords = {Computational Photography & Video; Image-based Modeling; Computer Vision}, Pages = {8--13}, Pdf = {ftp://ftp.cc.gatech.edu/pub/gvu/tr/1999/99-31.pdf}, Publisher = {IEEE Computer Society}, Title = {Motion Based Decompositing of Video}, Topic = {Computational Photography & Video; Image-based Modeling; Computer Vision}, Volume = {1}, Year = {1999}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/layering/}} @inproceedings{Brostow:2001:Image-Based-Motion, Abstract = {Stop motion animation is a well-established technique where still pictures of static scenes are taken and then played at film speeds to show motion. A major limitation of this method appears when fast motions are desired; most motion appears to have sharp edges and there is no visible motion blur. Appearance of motion blur is a strong perceptual cue, which is automatically present in live-action films, and synthetically generated in animated sequences. In this paper, we present an approach for automatically simulating motion blur. Ours is wholly a post-process, and uses image sequences, both stop motion or raw video, as input. First we track the frameto- frame motion of the objects within the image plane. We then integrate the scenes appearance as it changed over a period of time. This period of time corresponds to shutter speed in live-action filming, and gives us interactive control over the extent of the induced blur. We demonstrate a simple implementation of our approach as it applies to footage of different motions and to scenes of varying complexity. Our photorealistic renderings of these input sequences approximate the effect of capturing moving objects on film that is exposed for finite periods of time.}, Acceptance = {65 out of 300 = 22 percent}, Author = {G. J. Brostow and I. Essa}, Booktitle = {ACM SIGGRAPH Proceedings of Annual Conference on Computer graphics and interactive techniques}, Date-Modified = {2008-09-03 16:30:32 -0400}, Editor = {Eugene Fiume}, Html = {http://www.cc.gatech.edu/cpl/projects/blur}, Keywords = {Computational Photography & Video; Image-based Rendering; Computer Animation}, Note = {Appears in ACM Transactions on Graphics (TOG)}, Organization = {ACM}, Pages = {561--566}, Pdf = {http://www-static.cc.gatech.edu/gvu/perception/projects/blur/MotionBlur_BrostowEssa_SIGGRAPH01.pdf}, Publisher = {ACM Press / ACM SIGGRAPH}, Series = {Computer Graphics Proceedings, Annual Conference Series}, Title = {Image-Based Motion Blur for Stop Motion Animation}, Topic = {Computational Photography & Video; Image-based Rendering; Computer Animation}, Url = {http://www.cc.gatech.edu/cpl/projects/blur/}, Year = {2001}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/blur/}} @inproceedings{Choi:2006:Element-Free-Elastic, Abstract = {We present a new method of fitting an element-free volumetric model to a sequence of deforming surfaces of a moving object. Given a sequence of visual hulls, we iteratively fit an element-free elastic model to the visual hull in order to extract the optimal pose of the captured volume. The fitting of the volumetric model is acheived by minimizing a combination of elastic potential energy, a surface distance measure, and a self-intersection penalty for each frame. A unique aspect of our work is that the model is mesh free - since the model is represented as a point cloud, it is easy to construct, manipulate and update the model as needed. Additionally, linear elasicity with rotation compensation makes it possible to handle local deformations and large rotations of body parts much more efficiently than other volume fitting approaches. Our experimental results for volume fitting and capture in a multi-view camera setting demonstrate the robustness of element-free elastic models against noise and self-occlusions.}, Author = {J. Choi and A. Szymczak and G. Turk and I. Essa}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://doi.ieeecomputersociety.org/10.1109/CVPR.2006.110}, Keywords = {Computer Vision; Image-based Modeling}, Pages = {2245--2252}, Pdf = {http://www-static.cc.gatech.edu/gvu/perception//projects/VolumeFitting/cvpr06_efem.pdf}, Title = {Element-Free Elastic Models for Volume Fitting and Capture}, Topic = {Computer Vision; Image-based Modeling}, Volume = {2}, Year = {2006}, Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/CVPR.2006.110}} @inproceedings{Covington:2004:Parameterized-Authentication., Author = {M. J. Covington and M. Ahamad and I. Essa and H. Venkateswaran}, Booktitle = {Proceedings of European Symposium on Research in Computer Security (ESORICS)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computer Security; Aware Home}, Note = {(9th Annual Meeting, Sophia Antipolis, France)}, Pages = {276--292}, Title = {Parameterized Authentication.}, Topic = {Computer Security; Aware Home}, Url = {http://dblp.uni-trier.de/db/conf/esorics/esorics2004.html#CovingtonAEV04}, Year = {2004}, Bdsk-Url-1 = {http://dblp.uni-trier.de/db/conf/esorics/esorics2004.html#CovingtonAEV04}} @inproceedings{Darrell:1994:Correlation-and-Interpolation, Abstract = {We describe a framework for real-time tracking of facial expressions that uses neurally-inspired correlation and interpolation methods. A distributed view-based representation is used to characterize facial state, and is computed using a replicated correlation network. The ensemble response of the set of view correlation scores is input to a network based interpolation method, which maps perceptual state to motor control states for a simulated 3-D face model. Activation levels of the motor state correspond to muscle activations in an anatomically derived model. By integrating fast and robust 2-D processing with 3-D models, we obtain a system that is able to quickly track and interpret complex facial motions in real-time.}, Author = {T. Darrell and I. Essa and A. Pentland}, Booktitle = {Proceedings of Conference on Advances in Neural Information Processing Systems (NIPS)}, Citeseerurl = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.3483}, Date-Modified = {2008-09-03 16:26:03 -0400}, Editor = {G. Tesauro and D. S. Touretzky and T. K. Leen}, Keywords = {Computer Vision; Machine Learning; Face & Gesture; Multimodal Interfaces}, Publisher = {MIT Press}, Title = {Correlation and Interpolation Networks for Real-Time Expression Analysis/Synthesis}, Topic = {Computer Vision; Machine Learning; Face & Gesture; Multimodal Interfaces}, Volume = {7}, Year = {1994}, Bdsk-Url-1 = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.3483}, Bdsk-Url-2 = {http://books.nips.cc/papers/files/nips07/0909.pdf}, Bdsk-Url-3 = {http://books.nips.cc/papers/files/nips07/0909.djvu}} @inproceedings{Darrell:1995:Attention-driven-Expression, Address = {Zurich, Switzerland}, Author = {T. Darrell and I. Essa and A. Pentland}, Booktitle = {Proceedings of International Workshop on Automatic Face and Gesture Recogntion (FG)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Face & Gesture; Computer Vision; Vision for HCI}, Pages = {135--140}, Publisher = {Editor, M. Bichsel}, Title = {Attention-driven Expression and Gesture Analysis in an Interactive Environment}, Topic = {Face & Gesture; Computer Vision; Vision for HCI}, Year = {1995}} @article{Darrell:1996:Task-specific-Gesture, Abstract = {Hand and face gestures are modeled using an appearance-based approach in which patterns are represented as a vector of similarity scores to a set of view models defined in space and time. These view models are learned from examples using unsupervised clustering techniques. A supervised teaming paradigm is then used to interpolate view scores into a task-dependent coordinate system appropriate for recognition and control tasks. We apply this analysis to the problem of context-specific gesture interpolation and recognition, and demonstrate real-time systems which perform these tasks}, Author = {T. Darrell and I. Essa and A. Pentland}, Date-Modified = {2009-10-19 17:47:18 -0400}, Doi = {http://dx.doi.org/10.1109/34.546259}, Ieee = {http://ieeexplore.ieee.org/iel1/34/11961/00546259.pdf?tp=&arnumber=546259&isnumber=11961}, Journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)}, Keywords = {Computer Vision; Face & Gesture; Vision for HCI; Machine Learning}, Number = {12}, Title = {Task-specific Gesture Modeling using Interpolated Views}, Topic = {Computer Vision; Face & Gesture; Vision for HCI; Machine Learning}, Volume = {18}, Year = {1996}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/34.546259}} @inproceedings{Diakopoulos:2005:Mediating-Photo, Abstract = {The medium of collage supports the visualization of meaningful event summaries using photographs. It can however be rather tedious to author a collage from a large collection of photographs. In this work we present an approach that supports efficient construction of a collage by assisting the user with an automatic layout procedure that can be controlled at a high level. Our layout method utilizes a pre-designed template which consists of cells for photos and annotations applied to these cells. The layout is then filled by matching the metadata of photos to the annotations in the cells using an optimization algorithm. The user exercises flexibility in the authoring process by (a) maintaining high-level control through the types of constraints applied and (b) leveraging visual emphases supported by the layout algorithm. The user can of course provide fine-grained control of the final collage through direct manipulation. Off-loading the tedium of collage construction to a user controlled yet automated process clears the way for rapidly generating different views of the same album and could also support the increased sharing of digital photos in the form of compact collages. }, Address = {Seattle}, Anotate = {19 percent}, Author = {N. Diakopoulos and I. Essa}, Booktitle = {Proceedings of ACM Symposium on User Interface Software and Technology (UIST)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Photography & Video; HCI}, Month = {October}, Pages = {183 - 186}, Title = {Mediating Photo Collage Authoring}, Topic = {Computational Photography & Video; HCI}, Url = {http://portal.acm.org/citation.cfm?id=1095034.1095065}, Year = {2005}, Bdsk-Url-1 = {http://portal.acm.org/citation.cfm?id=1095034.1095065}} @inproceedings{Diakopoulos:2005:Supporting-Personal, Abstract = {Authoring media is a difficult task which is confounded by a huge range of possibilities for expressing any given message. In order to mitigate the task we argue for a tighter coupling between computer and author with human agency maintained through the use of suggestive user-interfaces. The general authoring task is described as choosing a message and editing the selection and arrangement of media elements. For personal media in particular, story-telling and experience sharing are highly important and influence the general task profile. We discuss the context of personal media authoring and the degree of automated support possible in light of complexity and annotation. }, Address = {Singapore}, Author = {N. Diakopoulos and I. Essa}, Booktitle = {Proceedings of the Workshop on Multimedia for Human Communication (MHC) at ACM Multimedia}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Multimedia; Computational Journalism}, Month = {November}, Pages = {21 - 23}, Title = {Supporting Personal Media Authoring}, Topic = {Multimedia; Computational Journalism}, Url = {http://portal.acm.org/citation.cfm?id=1099376.1099383}, Year = {2005}, Bdsk-Url-1 = {http://portal.acm.org/citation.cfm?id=1099376.1099383}} @inproceedings{Diakopoulos:2006:Videotater:-An-Approach, Abstract = {The continuous growth of media databases necessitates development of novel visualization and interaction tech-niques to support management of these collections. We present Videotater, an experimental tool for a Tablet PC that supports the efficient and intuitive navigation, selec-tion, segmentation, and tagging of video. Our veridical representation immediately signals to the user where ap-propriate segment boundaries should be placed and allows for rapid review and refinement of manually or automati-cally generated segments. Finally, we explore a distribution of modalities in the interface by using multiple timeline representations, pressure sensing, and a tag paint-ing/erasing metaphor with the pen.}, Author = {N. Diakopoulos and I. Essa}, Booktitle = {Proceedings of ACM Symposium on User Interface Software and Technology (UIST)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Photography & Video; HCI; Multimedia}, Month = {October}, Title = {Videotater: An Approach for Pen-Based Digital Video Segmentation and Tagging}, Topic = {Computational Photography & Video; HCI; Multimedia}, Year = {2006}, Bdsk-Url-1 = {http://www.deakondesign.com/Documents/tn151-diakopoulos.pdf}} @inproceedings{Diakopoulos:2004:Content-Based, Author = {N. Diakopoulos and I. Essa and R. Jain}, Booktitle = {Proceedings of Conference on Content-Based Image and Video Retrieval (CIVR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Photography & Video; Multimedia; Peer Production}, Pages = {299-307}, Title = {Content Based Image Synthesis.}, Topic = {Computational Photography & Video; Multimedia; Peer Production}, Url = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=3115&spage=299}, Year = {2004}, Bdsk-Url-1 = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=3115&spage=299}} @inproceedings{Diakopoulos:2007:The-Evolution-of-Authorship, Abstract = {Authorship entails the constrained selection or generation of media and the organization and layout of that media in a larger structure. But authorship is more than just selection and organization; it is a complex construct incorporating concepts of originality, authority, intertextuality, and attribution. In this paper we explore these concepts and ask how they are changing in light of modes of collaborative authorship in remix culture. We present a qualitative case study of an online video remixing site, illustrating how the constraints of that environment are impacting authorial constructs. We discuss users' self-conceptions as authors, and how values related to authorship are reflected to users through the interface and design of the site's tools. We also present some implications for the design of online communities for collaborative media creation and remixing.}, Address = {Manchester, UK}, Author = {N. Diakopoulos and K. Luther and Y. Medynskiy and I. Essa}, Booktitle = {Proceedings of ACM Conference on Hypertext and Hypermedia}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://doi.acm.org/10.1145/1286240.1286272}, Keywords = {Computational Journalism; Authoring; Multimedia}, Month = {September}, Publisher = {ACM Press}, Read = {Yes}, Title = {The Evolution of Authorship in a Remix Society}, Topic = {Computational Journalism; Authoring; Multimedia}, Year = {2007}, Bdsk-Url-1 = {http://www.deakondesign.com/wp-content/uploads/2007/06/hts5-diakopoulos.pdf}, Bdsk-Url-2 = {http://doi.acm.org/10.1145/1286240.1286272}, Bdsk-Url-3 = {http://doi.acm.org/10.1145/1286240.1286272}} @mastersthesis{Essa:1990:Contact-Detection, Author = {I. Essa}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computer Animation; Modeling}, Month = {June}, School = {Massachusetts Institute Technology}, Title = {Contact Detection, Collision Forces and Friction for Physically Based Virtual World Modeling}, Topic = {Computer Animation; Modeling}, Year = {1990}} @phdthesis{Essa:1994:Analysis-Interpretation, Address = {MIT Media Laboratory, Cambridge, MA 02139, USA}, Author = {I. Essa}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computer Vision; Face & Gesture; Multimodal Interfaces; Vision for HCI; Animation}, School = {Massachusetts Institute of Technology}, Title = {Analysis, Interpretation, and Synthesis of Facial Expressions}, Topic = {Computer Vision; Face & Gesture; Multimodal Interfaces; Vision for HCI; Animation}, Year = {1994}, Bdsk-Url-1 = {http://dspace.mit.edu/handle/1721.1/29086}} @article{Essa:1999:Computers-Seeing, Author = {I. Essa}, Date-Modified = {2008-09-03 16:26:03 -0400}, Journal = {AI Magazine}, Keywords = {Computer Vision; Computational Perception; HCI; AI; Aware Home}, Month = {Summer}, Number = {1}, Pages = {69--82}, Title = {Computers Seeing People}, Topic = {Computer Vision; Computational Perception; HCI; AI; Aware Home}, Volume = {20}, Year = {1999}} @article{Essa:2000:Ubiquitous-sensing, Abstract = {As computing technology continues to become increasingly pervasive and ubiquitous, we envision development of environments that can sense what we are doing and support our daily activities. In this article we outline our efforts toward building such environments, and discuss the importance of a sensing and signal-understanding infrastructure that leads to awareness of what is happening in an environment and how it can best be supported. Such an infrastructure supports both high- and low-end data transmission and processing, while allowing for detailed interpretation, modeling, and recognition from sensed information. We are currently prototyping several aware environments to aid in the development and study of such sensing and computation in real-world settings. }, Author = {I. Essa}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.comsoc.org/pci/private/2000/oct/essa.html}, Journal = {IEEE Personal Communications}, Keywords = {Aware Home; Intelligent Environments}, Month = {October}, Note = {Special Issue on Networking the Physical World.}, Pdf = {http://www.comsoc.org/pci/private/2000/oct/pdf/essa.pdf}, Title = {Ubiquitous sensing for smart and aware environments}, Topic = {Aware Home; Intelligent Environments}, Year = {2000}} @inproceedings{Essa:1997:Computational-Perception, Author = {I. Essa and G. Abowd and C. Atkeson}, Booktitle = {Proceedings of Workshop on Perceptual User Interfaces (PUI)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Perception; HCI; Aware Home}, Title = {Computational Perception in Future Computing Environments}, Topic = {Computational Perception; HCI; Aware Home}, Year = {1997}, Bdsk-Url-1 = {http://www.cc.gatech.edu/fce/pubs/pui97-fce.html}} @inproceedings{Essa:2002:Building-and-Aware, Address = {Kyoto, Japan}, Author = {I. Essa and G. Abowd and A. Bobick and E. Mynatt and W. Rogers}, Booktitle = {Proceedings of First International Workshop on Man-Machine Symbiosis}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Aware Home; Intelligent Environments; HCI; Aging-in-Place}, Title = {Building and Aware Home: Technologies for the way we may live}, Topic = {Aware Home; Intelligent Environments; HCI; Aging-in-Place}, Year = {2002}} @inproceedings{Essa:1996:Modeling-Tracking, Author = {I. Essa and S. Basu and T. Darrell and A. Pentland}, Booktitle = {Proceedings of Computer Animation Conference}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/CA.1996.540489}, Keywords = {Animation; Image-based Modeling; Face & Gesture; Computer Vision}, Month = {June}, Pages = {68--79}, Pdf = {http://ieeexplore.ieee.org/iel3/3843/11211/00540489.pdf?tp=&arnumber=540489&isnumber=11211}, Publisher = {IEEE Computer Society Press}, Title = {Modeling, Tracking and Interactive Animation of Faces and Heads using Input from Video}, Topic = {Animation; Image-based Modeling; Face & Gesture; Computer Vision}, Year = {1996}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/CA.1996.540489}} @inproceedings{Essa:2000:A-Course-on-Digital, Author = {I. Essa and G. J. Brostow}, Booktitle = {Proceedings of IEEE CS Workshop on Undergraduate Education and Image Computation}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.cc.gatech.edu/dvfx}, Keywords = {DVFX; Animation; Computer Graphics; Computer Vision; Computational Photography & Video}, Month = {June}, Pdf = {http://www.cc.gatech.edu/gvu/people/gabriel.brostow/research/CVPR2000/WorkshopDVFX_CVPR2001.pdf}, Title = {A Course on Digital Video Special Effects}, Topic = {DVFX; Animation; Computer Graphics; Computer Vision; Computational Photography & Video}, Year = {2000}} @inproceedings{Essa:1994:Tracking-Facial, Abstract = {We describe a computer system that allows real-time tracking of facial expressions. Sparse, fast visual measurements using 2D templates are used to observe the face of a subject. Rather than track features on the face, the distributed response of a set of templates is used to characterize a given facial region. These measurements ape coupled via a linear interpolation method to states in a physically-based model of facial animation, which includes both skin and muscle dynamics. By integrating real-time 2D image-processing with 3D models we obtain a system that is able to quickly track and interpret complex facial motions}, Author = {I. Essa and T. Darrell and A. Pentland}, Booktitle = {Proceedings of IEEE Workshop on Motion of Nonrigid and Articulated Objects}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/MNRAO.1994.346257}, Keywords = {Face & Gesture; Multimodal Interfaces; Computer Vision}, Pages = {36--42}, Pdf = {http://ieeexplore.ieee.org/iel2/1000/8038/00346257.pdf?tp=&arnumber=346257&isnumber=8038}, Publisher = {IEEE Computer Society}, Title = {Tracking Facial Motion}, Topic = {Face & Gesture; Multimodal Interfaces; Computer Vision}, Year = {1994}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/MNRAO.1994.346257}} @techreport{Essa:1991:Estimated-Physics:, Author = {I. Essa and A. Pentland}, Date-Modified = {2008-09-03 16:26:03 -0400}, Institution = {M.I.T. Media Laboratory, Vision and Modeling Group}, Keywords = {Computer Animation; Modeling}, Number = {182}, Title = {Estimated Physics: Dynamic Simulation in a Resource Limited Environment}, Topic = {Computer Animation; Modeling}, Year = {1991}} @inproceedings{Essa:1994:A-Vision-System, Author = {I. Essa and A. Pentland}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/CVPR.1994.323813}, Keywords = {Face & Gesture; Computer Vision; Vision for HCI}, Pages = {76--83}, Pdf = {http://ieeexplore.ieee.org/iel2/977/7716/00323813.pdf?tp=&arnumber=323813&isnumber=7716}, Publisher = {IEEE Computer Society}, Title = {A Vision System for Observing and Extracting Facial Action Parameters}, Topic = {Face & Gesture; Computer Vision; Vision for HCI}, Year = {1994}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/CVPR.1994.323813}} @inproceedings{Essa:1995:Facial-Expression, Abstract = {Previous efforts at facial expression recognition have been based on the Facial Action Coding System (FACS), a representation developed in order to allow human psychologists to code expression from static facial ``mugshots.'' We develop new more accurate representations for facial expression by building a video database of facial expressions and then probabilistically characterizing the facial muscle activation associated with each expression using a detailed physical model of the skin and muscles. This produces a muscle based representation of facial motion, which is then used to recognize facial expressions in two different ways. The first method uses the physics based model directly, by recognizing expressions through comparison of estimated muscle activations. The second method uses the physics based model to generate spatio temporal motion energy templates of the whole face for each different expression. These simple, biologically plausible motion energy ``templates'' are then used for recognition. Both methods show substantially greater accuracy at expression recognition than has been previously achieved}, Address = {Cambridge, MA}, Author = {I. Essa and A. Pentland}, Booktitle = {Proceedings of IEEE International Conference on Computer Vision (ICCV)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {10.1109/ICCV.1995.466916}, Keywords = {Computer Vision; Face & Gesture; Multimodal Interfaces; Vision for HCI}, Pages = {360--367}, Publisher = {IEEE Computer Society}, Title = {Facial Expression Recognition using a Dynamic Model and Motion Energy}, Topic = {Computer Vision; Face & Gesture; Multimodal Interfaces; Vision for HCI}, Url = {http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=466916}, Year = {1995}, Bdsk-Url-1 = {http://ieeexplore.ieee.org/search/srchabstract.jsp?arnumber=466916&isnumber=9796&punumber=3245&k2dockey=466916@ieeecnfs&query=%28%28essa%29%3Cin%3Eau+%29&pos=18&access=no}, Bdsk-Url-2 = {http://ieeexplore.ieee.org/iel2/3245/9796/00466916.pdf?tp=&isnumber=9796&arnumber=466916&punumber=3245}, Bdsk-Url-3 = {http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=466916}, Bdsk-Url-4 = {http://dx.doi.org/10.1109/ICCV.1995.466916%20}, Bdsk-Url-5 = {http://dx.doi.org/10.1109/ICCV.1995.466916}} @article{Essa:1997:Coding-Analysis, Abstract = {We describe a computer vision system for observing facial motion by using an optimal estimation optical flow method coupled with geometric, physical and motion-based dynamic models describing the facial structure. Our method produces a reliable parametric representation of the face's independent muscle action groups, as well as an accurate estimate of facial motion. Previous efforts at analysis of facial expression have been based on the facial action coding system (FACS), a representation developed in order to allow human psychologists to code expression from static pictures. To avoid use of this heuristic coding scheme, we have used our computer vision system to probabilistically characterize facial motion and muscle activation in an experimental population, thus deriving a new, more accurate, representation of human facial expressions that we call FACS . Finally, we show how this method can be used for coding, analysis, interpretation, and recognition of facial expressions}, Author = {I. Essa and A. Pentland}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {10.1109/34.598232}, Journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)}, Keywords = {Computer Vision; Face & Gesture; Animation; Vision for HCI}, Number = {7}, Pages = {757--763}, Title = {Coding, Analysis, Interpretation, and Recognition of Facial Expressions}, Topic = {Computer Vision; Face & Gesture; Animation; Vision for HCI}, Url = {http://academics.irfanessa.com/1997/07/14/paper-ieee-pami-1997-coding-analysis-interpretation-and-recognition-of-facial-expressions/}, Volume = {19}, Year = {1997}, Bdsk-Url-1 = {http://academics.irfanessa.com/1997/07/14/paper-ieee-pami-1997-coding-analysis-interpretation-and-recognition-of-facial-expressions/}, Bdsk-Url-2 = {http://csdl2.computer.org/persagen/DLAbsToc.jsp?resourcePath=/dl/trans/tp/&toc=comp/trans/tp/1997/07/i7toc.xml&DOI=10.1109/34.598232}, Bdsk-Url-3 = {http://doi.ieeecomputersociety.org/10.1109/34.598232}, Bdsk-Url-4 = {http://dx.doi.org/10.1109/34.598232}} @inbook{Essa:1997:Motion-and-Model-based, Author = {I. Essa and A. Pentland}, Booktitle = {Motion Based Recognition}, Date-Modified = {2008-09-03 16:26:35 -0400}, Editor = {M. Shah and R. Jain}, Keywords = {Computer Vision; Face & Gesture; Animation}, Publisher = {Kluwer Academic Publishers}, Series = {Computational Imaging and Vision Series}, Title = {Motion and Model-based Recognition and Interpretation of Facial Expressions}, Topic = {Computer Vision; Face & Gesture; Animation}, Year = {1997}, Bdsk-Url-1 = {http://www.springer.com/computer/computer+imaging/book/978-0-7923-4618-0}, Bdsk-Url-2 = {http://books.google.com/books?id=OXGoTjniP-MC&dq=motion+based+recognition&pg=PP1&ots=YVpeCAU7mj&sig=phFCIa_rGDZfgrFbfBh3_OkUh-M&hl=en&sa=X&oi=book_result&resnum=1&ct=result}} @article{Essa:1992:A-Unified-Approach, Abstract = {We present a unified approach for geometric and physical modeling using implicit functions, for application to graphics and animation. This method extends previously proposed techniques, and allows the standard finite element method to be directly combined with geometric modeling, resulting in quick calculation of an object's mass and stiffness matrices, and its vibration modes and frequencies. Because the approach is based on an implicitfunction representation, it allows very fast collision detection and characterization. Examples of complex physical and geometric modeling are presented.}, Author = {I. Essa and S. Sclaroff and A. Pentland}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {10.1111/1467-8659.1130129}, Editors = {Alistar Kilgour and Lars Kjelldahl}, Journal = {Computer Graphics Forum, The International Journal of the Eurographics Association}, Keywords = {Modeling; Computer Animation}, Month = {May}, Number = {3}, Pages = {129-138}, Title = {A Unified Approach for Physical and Geometric Modeling for Graphics and Animation}, Topic = {Modeling; Computer Animation}, Volume = {11}, Year = {1992}, Bdsk-Url-1 = {http://www.blackwell-synergy.com/doi/abs/10.1111/1467-8659.1130129?cookieSet=1&journalCode=cgf}, Bdsk-Url-2 = {http://dx.doi.org/10.1111/1467-8659.1130129}} @inbook{Essa:1993:Physically-based-Modeling, Author = {I. Essa and S. Sclaroff and A. Pentland}, Booktitle = {Directions in Geometric Computing}, Citeseerurl = {http://citeseer.ist.psu.edu/2883.html}, Date-Modified = {2008-09-03 16:26:17 -0400}, Editor = {Ralph Martin}, Keywords = {Modeling; Computer Animation; Computer Vision}, Pages = {160--196}, Publisher = {Information Geometers, U.K.}, Read = {Yes}, Title = {Physically-based Modeling for Graphics and Vision}, Topic = {Modeling; Computer Animation; Computer Vision}, Year = {1993}, Bdsk-Url-1 = {http://citeseer.ist.psu.edu/2883.html}} @inproceedings{Gardner:1997:Prosody-Analysis, Abstract = {Speech is a complex waveform containing verbal (e.g. phoneme, syllable, and word) and nonverbal (e.g. speaker identity, emotional state, and tone) information. Both the verbal and nonverbal aspects of speech are extremely important in interpersonal communication and human-machine interaction. However, work in machine perception of speech has focused primarily on the verbal, or content-oriented, goals of speech recognition, speech compression, and speech labeling. Usage of nonverbal information has been limited to speaker identification applications. While the success of research in these areas is well documented, this success is fundamentally limited by the effect of nonverbal information on the speech waveform. The extra-linguistic aspect of speech is considered a source of variability that theoretically can be minimized with an appropriate preprocessing technique; determination of such robust techniques is however, far from trivial. It is widely believed in the speech processing community that the nonverbal component of speech contains higher-level information that provides cues for auditory scene analysis, speech understanding, and the determination of a speaker's psychological state or conversational tone. We believe that the identification of such nonverbal cues can improve the performance of classic speech processing tasks and will be necessary for the realization of natural, robust human-computer speech interfaces. In this paper we seek to address the problem of how to systematically analyze the nonverbal aspect of the speech waveform to determine speaker affect, specifically by analyzing the pitch contour.}, Author = {A. Gardner and I. Essa}, Booktitle = {Proceedings of Workshop on Perceptual User Interfaces (PUI)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Audio/Music Analysis; Multimodal Interfaces; Affective Computing}, Month = {October}, Note = {In Conjunction with UIST 1997}, Title = {Prosody Analysis for Speaker Affect Determination}, Topic = {Audio/Music Analysis; Multimodal Interfaces; Affective Computing}, Url = {http://www-static.cc.gatech.edu/cpl/pubs/pui.97/}, Year = {1997}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAagAAAAAAagAAgAABWlyZmFuAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABCRAAB/////xhHYXJkbmVyLUVzc2EtUFVJMTk5Ny5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAAIABQAAEgFjdQAAAAAAAAAAAAAAAAAGcHVpLjk3AAIAPC86Vm9sdW1lczppcmZhbjpjcGw6d3d3OnB1YnM6cHVpLjk3OkdhcmRuZXItRXNzYS1QVUkxOTk3LnBkZgAOADIAGABHAGEAcgBkAG4AZQByAC0ARQBzAHMAYQAtAFAAVQBJADEAOQA5ADcALgBwAGQAZgAPAAwABQBpAHIAZgBhAG4AEgAtL2NwbC93d3cvcHVicy9wdWkuOTcvR2FyZG5lci1Fc3NhLVBVSTE5OTcucGRmAAATAA4vVm9sdW1lcy9pcmZhbgAJADYANmNpZnMAAAEAAABjaWZzOi8vaXJmYW5AY2NzYW1iYS5jYy5nYXRlY2guZWR1L2lyZmFuAAD//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QMi4uLy4uL2NwbC93d3cvcHVicy9wdWkuOTcvR2FyZG5lci1Fc3NhLVBVSTE5OTcucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBlAGwAbwBxAHMAdgB4AHoAfACGAJMAmACgAkwCTgJTAlwCZwJrAnkCgAKJAr4CwwLGAtMC2AAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALq}, Bdsk-Url-1 = {http://www-static.cc.gatech.edu/~irfan/pubs/papers/Gardner-Essa-PUI-1997.pdf}, Bdsk-Url-2 = {http://www-static.cc.gatech.edu/cpl/pubs/pui.97/}} @inproceedings{Hamid:2003:ARGMode-Activity, Abstract = {This paper presents a new framework for tracking and, recognizing complex multi-agent activities using probabilistic tracking coupled with graphical models for recognition. We employ statistical feature based particle filter to robustly track multiple objects in cluttered environments. Both color and shape characteristics are used to differentiate and track different objects so that low level visual information can be reliably extracted for recognition of complex activities. Such extracted spatio-temporal features are then used to build temporal graphical models for characterization of these activities. We demonstrate through examples in different scenarios, the generalizability and robustness of our framework.}, Address = {Madison, WI}, Author = {R. Hamid and Y. Huang and I. Essa}, Booktitle = {Proceedings of IEEE Workshop on Event Mining, Event Detection, and Recognition in Video}, Date-Modified = {2009-10-19 16:51:43 -0400}, Html = {http://www.cc.gatech.edu/cpl/projects/ARGMode/}, Keywords = {Computer Vision; Activity Recognition}, Note = {In Conjunction with CVPR 2003}, Pages = {38--44}, Publisher = {IEEE Computer Society}, Title = {ARGMode - Activity Recognition using Graphical Models}, Topic = {Computer Vision; Activity Recognition}, Volume = {4}, Year = {2003}} @inproceedings{Hamid:2006:Unsupervised-Analysis, Abstract = {We present an unsupervised framework to discover characterizations of everyday human activities, and demonstrate how such representations can be used to extract points of interest in event-streams. We begin with the usage of Suffix Trees as an efficient activity-representation to analyze the global structural information of activities, using their local event statistics over the entire continuum of their temporal resolution. Exploiting this representation, we discover characterizing event-subsequences and present their usage in an ensemble-based framework for activity classification. Finally, we propose a method to automatically detect subsequences of events that are locally atypical in a structural sense. Results over extensive data-sets, collected from multiple sensor-rich environments are presented, to show the competence and scalability of the proposed framework.}, Author = {R. Hamid and S. Maddi and A. Bobick and I. Essa}, Booktitle = {Proceedings of ACM International Workshop on Video Surveillance and Sensor Networks}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; Computer Vision}, Note = {(in conjunction with ACM Multimedia 2006)}, Organization = {ACM}, Pdf = {http://www-static.cc.gatech.edu/~raffay/acm_vssn.pdf}, Title = {Unsupervised Analysis of Activity Sequences Using Event Motifs}, Topic = {Activity Recognition; Computer Vision}, Year = {2006}} @inproceedings{Hamid:2007:Structure-from, Abstract = {Models of activity structure for unconstrained environments are generally not available a priori. Recent representational approaches to this end are limited by their computational complexity, and ability to capture activity structure only up to some fixed temporal scale. In this work, we propose Suffix Trees as an activity representation to efficiently extract structure of activities by analyzing their constituent event-subsequences over multiple temporal scales., We empirically compare Suffix Trees with some of the previous approaches in terms of feature cardinality, discriminative prowess, noise sensitivity and activity-class discovery. Finally, exploiting properties of Suffix Trees, we present a novel perspective on anomalous subsequences of activities, and propose an algorithm to detect them in linear-time. We present comparative results over experimental data, collected from a kitchen environment to demonstrate the competence of our proposed framework.}, Annote = {Acceptance: 23.5% }, Author = {R. Hamid and S. Maddi and A. Bobick and I. Essa}, Booktitle = {Proceedings of IEEE International Conference on Computer Vision (ICCV)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computer Vision; Activity Recognition; Machine Learning}, Publisher = {IEEE Computer Society Press}, Read = {Yes}, Title = {Structure from Statistics - Unsupervised Activity Analysis using Suffix Trees}, Topic = {Computer Vision; Activity Recognition; Machine Learning}, Year = {2007}} @inproceedings{Hamid:2005:Discovery-and-Characterization, Abstract = {We present a framework to discover and characterized different classes of everyday activities from event-streams. We begin by representing activities as bags of event n-grams. This allows us to analyze the global structural information of activities, using their local event statistics. We demonstrate how maximal cliques in an undirected edge-weighted graph of activities, can be used for activity-class discovery in an unsupervised manner. We show how modeling an activity as a variable length Markov process, can be used to discover recurrent event-motifs to characterize the discovered activity-classes. We present results over extensive data-sets, collected from multiple active environments, to show the competence and generalizability of our proposed framework.}, Address = {Edinburgh, SCOTLAND}, Anotate = {(82 papers accepted)/(243 papers submitted) = 34 percent}, Author = {R. Hamid and S. Maddi and A. Johnson and A. Bobick and I. Essa and C. Isbell}, Booktitle = {Proceedings of Conference on Uncertainty in Artificial Intelligence (UAI)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www-static.cc.gatech.edu/~raffay/local_anomaly_exp_pages/anomaly_explanation.htm}, Keywords = {Activity Recognition; Computer Vision; AI}, Month = {July}, Pages = {251-258}, Pdf = {http://www.cc.gatech.edu/~raffay/hamid_uai05.pdf}, Read = {Yes}, Title = {Discovery and Characterization of Activities from Event-Streams}, Topic = {Activity Recognition; Computer Vision; AI}, Year = {2005}} @inproceedings{Hamid:2005:Unsupervised-Activity, Abstract = {Recognizing what is happening in an environment has many potential applications, ranging from automatic surveillance systems to supporting users in ubiquitous environments. A key step to this end is to discover the kinds of similar activities that frequently occur in a particular domain. Equally important is the question of finding efficient characterizations for these different kinds of activities. We are interested in the study of activity class discovery and characterization, in the context of analyzing everyday activities. We present a novel representation of activities as bags of discrete n-grams, . We then demonstrate how disjunctive activity groups can be discovered in an unsupervised manner. Finally, we lay out a framework for unsupervised discovery of predictably recurrent event motifs for activity class characterization.}, Address = {Snowbird, Utah}, Author = {R. Hamid and S. Maddi and A. Johnson and A. Bobick and I. Essa and C. Isbell.}, Booktitle = {Proceedings of The Learning Workshop at Snowbird}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; AI; Machine Learning}, Pdf = {http://www-static.cc.gatech.edu/~raffay/snowbird.pdf}, Title = {Unsupervised Activity Discovery and Characterization From Event-Streams}, Topic = {Activity Recognition; AI; Machine Learning}, Year = {2005}} @inproceedings{Haro:2002:Learning-Video, Abstract = {We present an algorithm that approximates the output of an arbitrary video processing algorithm based on a pair of input and output exemplars. Our algorithm relies on learning the mapping between the input and output exemplars to model the processing that has taken place. We approximate the processing by observing that pixel neighborhoods similar in appearance and motion to those in the exemplar input should result in neighborhoods similar to the exemplar output. Since there are not many pixel neighborhoods in the exemplars, we use techniques from texture synthesis to generalize the output of neighborhoods not observed in the exemplars. The same algorithm is used to learn such processing as motion blur color correction, and painting.}, Address = {Quebec, Canada}, Anotate = {65 percent}, Author = {A. Haro and I. Essa}, Booktitle = {Proceedings of International Conference on Pattern Recognition (ICPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Editor = {R. Kasturi and D. Laurendeau and C. Suen}, Keywords = {Computational Photography & Video; Machine Learning; Computer Vision}, Organization = {CIPPRS and IAPR and IEEE}, Pages = {487--491}, Title = {Learning Video Processing by Example}, Topic = {Computational Photography & Video; Machine Learning; Computer Vision}, Volume = {1}, Year = {2002}, Bdsk-Url-1 = {http://ieeexplore.ieee.org/search/freesrchabstract.jsp?arnumber=1044771&isnumber=22378&punumber=8091&k2dockey=1044771@ieeecnfs&query=((essa)%3Cin%3Eau+)&pos=5&access=yes}} @inproceedings{Haro:2003:Exemplar-Based, Address = {Munich, Germany}, Anotate = {67 percent}, Author = {A. Haro and I. Essa}, Booktitle = {Proceedings of Conference on Vision, Modeling, and Visualization}, Date-Modified = {2008-09-03 16:26:03 -0400}, Editor = {T Ertl. and B. Girod and G. Greiner and H. Niemann and H.-P. Seidel and E. Steinbach and R. Westermann}, Keywords = {Computational Photography & Video; Image-based Modeling}, Organization = {Computer Graphics \& Visualization Group and the Media Technology Group of Technische Universitat Munchen}, Pages = {95--101}, Publisher = {Akademische Verlagsgesellschaft Aka GmbH, Berlin}, Title = {Exemplar Based Surface Texture}, Topic = {Computational Photography & Video; Image-based Modeling}, Year = {2003}} @inproceedings{Haro:2000:A-Non-Invasive-Computer, Author = {A. Haro and I. Essa and M. Flickner}, Booktitle = {Proceedings of ACM CHI Conference on Human factors in Computing Systems}, Date-Modified = {2008-09-03 16:26:03 -0400}, Editor = {G. Szwillus and T. Turner}, Keywords = {Vision for HCI; Face & Gesture; HCI}, Note = {Short Paper in Extended Abstracts}, Organization = {ACM}, Pages = {167--168}, Publisher = {ACM press}, Title = {A Non-Invasive Computer Vision System For Reliable Eye Tracking}, Topic = {Vision for HCI; Face & Gesture; HCI}, Year = {2000}} @inproceedings{Haro:2000:Detecting-and-Tracking, Address = {Hilton Head Island, SC, USA}, Anotate = {47 percent}, Author = {A. Haro and M. Flickner and I. Essa}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2009-10-19 17:46:48 -0400}, Editor = {J. Ponce and J. Malik and D. Kriegman and D. Forsyth}, Keywords = {Vision for HCI; Face & Gesture; Computer Vision}, Organization = {IEEE}, Pages = {163--168}, Publisher = {IEEE}, Read = {Yes}, Title = {Detecting and Tracking Eyes By Using Their Physiological Properties, Dynamics, and Appearance}, Topic = {Vision for HCI; Face & Gesture; Computer Vision}, Volume = {1}, Year = {2000}} @inproceedings{Haro:2001:Real-time-Photo-realistic, Address = {London, England}, Anotate = {39 percent}, Author = {A. Haro and B. Guenter and I. Essa}, Booktitle = {Proceedings of Eurographics Workshop on Rendering (ESR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Editor = {S. Gortler and K. Myszkowski}, Keywords = {Image-based Rendering; Computer Graphics}, Organization = {Eurographics}, Pages = {53--62}, Title = {Real-time, Photo-realistic, Physically Based Rendering of Fine Scale Human Skin Structure}, Topic = {Image-based Rendering; Computer Graphics}, Year = {2001}} @inproceedings{Hays:2004:Image-and-video, Acm = {http://doi.acm.org/10.1145/987657.987676}, Address = {New York, NY, USA}, Author = {J. Hays and I. Essa}, Booktitle = {Proceedings of ACM Conference on Non-photorealistic animation and rendering (NPAR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1145/987657.987676}, Html = {http://www.cc.gatech.edu/cpl/projects/artstyling/}, Isbn = {1-58113-887-3}, Keywords = {Computational Photography & Video; Nonphotorealistic Rendering; Animation}, Location = {Annecy, France}, Pages = {113--120}, Pdf = {http://www-static.cc.gatech.edu/gvu/perception//projects/artstyling/IVBPA_final.pdf}, Publisher = {ACM Press}, Title = {Image and video based painterly animation}, Topic = {Computational Photography & Video; Nonphotorealistic Rendering; Animation}, Year = {2004}, Bdsk-Url-1 = {http://dx.doi.org/10.1145/987657.987676}} @inproceedings{Huang:2005:Tracking-Multiple, Abstract = {We present an approach for tracking varying number, of objects through both temporally and spatially significant occlusions. Our method builds on the idea of object permanence to reason about occlusions. To this end, tracking is performed at both the region level and the object level. At the region level, a customized genetic algorithm is used to search for optimal region tracks. This limits the scope of object trajectories. At the object level, each object is located based on adaptive appearance models, spatial distributions and inter-occlusion relationships. The proposed architecture is capable of tracking objects even in the presence of long periods of full occlusions. We demonstrate the viability of this approach by experimenting on several videos of a user interacting with a variety of objects on a desktop.}, Address = {San Diego, CA, USA}, Anotate = {20.6 percent}, Author = {Y. Huang and I. Essa}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Visual Tracking; Activity Recognition; Computer Vision}, Month = {June}, Pages = {1051--1058}, Publisher = {IEEE Computer Society}, Title = {Tracking Multiple Objects Through Occlusions}, Topic = {Visual Tracking; Activity Recognition; Computer Vision}, Url = {http://www.cc.gatech.edu/cpl/projects/occlusion}, Year = {2005}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/occlusion}} @inproceedings{Kidd:1999:The-Aware-Home:, Author = {C. Kidd and R. Orr and G. Abowd and C. Atkeson and I. Essa and B. MacIntyre and E. Mynatt and T. Starner and W. Newstetter}, Booktitle = {Proceedings of Conference on Cooperative Buildings (CoBuild)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://link.springer.de/link/service/series/0558/bibs/1370/13700154.htm}, Keywords = {Aware Home; Intelligent Environments}, Pages = {191-198}, Title = {The Aware Home: A Living Laboratory for Ubiquitous Computing Research.}, Topic = {Aware Home; Intelligent Environments}, Year = {1999}} @inproceedings{Kim:2005:Video-based-nonphotorealistic, Author = {B. Kim and I. Essa}, Booktitle = {Proceedings of Computer Graphics International (CGI)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/0.1109/CGI.2005.1500363}, Keywords = {Nonphotorealistic Rendering; Image-based Rendering; Computational Photography & Video}, Pages = {32 - 35}, Pdf = {http://ieeexplore.ieee.org/iel5/10021/32165/01500363.pdf?tp=&arnumber=1500363&isnumber=32165}, Publisher = {IEEE Computer Society Press}, Title = {Video-based nonphotorealistic and expressive illustration of motion}, Topic = {Nonphotorealistic Rendering; Image-based Rendering; Computational Photography & Video}, Year = {2005}, Bdsk-Url-1 = {http://dx.doi.org/0.1109/CGI.2005.1500363%20}, Bdsk-Url-2 = {http://dx.doi.org/0.1109/CGI.2005.1500363}} @inproceedings{Kim:2006:Interactive-Mosaic, Abstract = {Navigation through large multimedia collections that include videos, and images still remains cumbersome. In this paper, we introduce a novel method to visualize and navigate through the collection by creating a mosaic image that visually represents the compilation. This image is generated by a labeling-based layout algorithm using various sizes of sample tile images from the collection. Each tile represents both the photographs and video files representing scenes selected by matching algorithms. This generated mosaic image provides a new way for thematic video and visually summarizes the videos. Users can generate these mosaics with some predefined themes and layouts, or base it on the results of their queries. Our approach supports automatic generation of these layouts by using meta-information such as color, time-line and existence of faces or manually generated annotated information from existing systems (e.g., the Family Video Archive).}, Address = {Santa Barbara,CA,USA}, Annotate = {Acceptance rate: 35 percent}, Author = {K. Kim and I. Essa and G. Abowd}, Booktitle = {Proceedings of ACM International Conference on Multimedia}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Photography & Video; Multimedia}, Month = {October}, Pdf = {http://cpl.cc.gatech.edu/projects/InteractiveMosaic/sp48-kim.pdf}, Title = {Interactive Mosaic Generation for Video Navigation}, Topic = {Computational Photography & Video; Multimedia}, Year = {2006}} @article{Kwatra:2005:Texture-Optimization, Abstract = { We present a novel technique for texture synthesis using optimization. We define a Markov Random Field (MRF)-based similarity metric for measuring the quality of synthesized texture with respect to a given input sample. This allows us to formulate the synthesis problem as minimization of an energy function, which is optimized using an Expectation Maximization (EM)-like algorithm. In contrast to most example-based techniques that do region-growing, ours is a joint optimization approach that progressively refines the entire texture. Additionally, our approach is ideally suited to allow for controllable synthesis of textures. Specifically, we demonstrate controllability by animating image textures using flow fields. We allow for general two-dimensional flow fields that may dynamically change over time. Applications of this technique include dynamic texturing of fluid animations and texture-based flow visualization. }, Acm = {http://portal.acm.org/citation.cfm?id=1073263}, Anotate = {21 percent}, Author = {V. Kwatra and I. Essa and A. Bobick and N. Kwatra}, Date-Modified = {2008-09-03 16:30:58 -0400}, Doi = {http://dx.doi.org/10.1145/1073204.1073263}, Issn = {0730-0301}, Journal = {ACM SIGGRAPH Proceedings of Annual Conference on Computer graphics and interactive techniques}, Keywords = {Computational Photography & Video; Texture Synthesis; Video Textures; Animation}, Month = {August}, Note = {Appears in ACM Transactions on Graphics (TOG)}, Number = {3}, Pages = {795--802}, Pdf = {http://www.cc.gatech.edu/gvu/perception/projects/textureoptimization/TO-final.pdf}, Title = {Texture Optimization for Example-based Synthesis}, Topic = {Computational Photography & Video; Texture Synthesis; Video Textures; Animation}, Url = {http://www.cc.gatech.edu/cpl/projects/textureoptimization/}, Volume = {24}, Year = {2005}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/textureoptimization/}, Bdsk-Url-2 = {http://dx.doi.org/10.1145/1073204.1073263}} @article{Kwatra:2003:Graphcut-Textures:, Abstract = { In this paper we introduce a new algorithm for image and video texture synthesis. In our approach, patch regions from a sample image or video are transformed and copied to the output and then stitched together along optimal seams to generate a new (and typically larger) output. In contrast to other techniques, the size of the patch is not chosen a-priori, but instead a graph cut technique is used to determine the optimal patch region for any given offset between the input and output texture. Unlike dynamic programming, our graph cut technique for seam optimization is applicable in any dimension. We specifically explore it in 2D and 3D to perform video texture synthesis in addition to regular image synthesis. We present approximative offset search techniques that work well in conjunction with the presented patch size optimization. We show results for synthesizing regular, random, and natural images and videos. We also demonstrate how this method can be used to interactively merge different images to generate new scenes. }, Acm = {http://portal.acm.org/citation.cfm?id=882264&coll=&dl=ACM&CFID=15151515&CFTOKEN=6184618}, Anotate = {19 percent}, Author = {V. Kwatra and A. Sch{\"o}dl and I. Essa and G. Turk and A. Bobick}, Date-Modified = {2008-09-03 16:31:07 -0400}, Doi = {http://dx.doi.org/10.1145/882262.882264}, Journal = {ACM SIGGRAPH Proceedings of Annual Conference on Computer graphics and interactive techniques}, Keywords = {Computational Photography & Video; Texture Synthesis; Video Textures; Animation}, Month = {July}, Note = {Appears in ACM Transactions on Graphics (TOG)}, Number = {3}, Pages = {277--286}, Pdf = {http://www.cc.gatech.edu/gvu/perception/projects/graphcuttextures/gc-final.pdf}, Title = {Graphcut Textures: Image and Video Synthesis Using Graph Cuts}, Topic = {Computational Photography & Video; Texture Synthesis; Video Textures; Animation}, Url = {http://www.cc.gatech.edu/cpl/projects/graphcuttextures/}, Volume = {22}, Year = {2003}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/graphcuttextures/}, Bdsk-Url-2 = {http://dx.doi.org/10.1145/882262.882264}} @inproceedings{Minnen:2003:Expectation-Grammars:, Abstract = {Video-based recognition and prediction of a temporally extended activity can benefit from a detailed description of high-level expectations about the activity. Stochastic grammars allow for an efficient representation of such expectations and are well-suited for the specification of temporally well-ordered activities. In this paper, we extend stochastic grammars by adding event parameters, state checks, and sensitivity to an internal scene model. We present an implemented system that uses human-specified grammars to recognize a person performing the Towers of Hanoi task from a video sequence by analyzing object interaction events. Experimental results from several videos show robust recognition of the full task and its constituent sub-tasks even though no appearance models of the objects in the video are provided. These experiments include videos of the task performed with different shaped objects and with distracting and extraneous interactions.}, Annotate = {anotate rate: (60 oral + 149 poster)/(905 submissions) = 23 percent}, Author = {D. Minnen and I. Essa and T. Starner}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, City = {Madison, WI}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; Computer Vision}, Month = {June}, Pages = {626-632}, Pdf = {http://www.gvu.gatech.edu/ccg/publications/minnen-cvpr2003.pdf}, Title = {Expectation Grammars: Leveraging High-Level Expectations for Activity Recognition}, Topic = {Activity Recognition; Computer Vision}, Year = {2003}} @inproceedings{Minnen:2007:Discovering-Multivariate, Annotate = {Accepted for oral presentation. 253 papers accepted of 921 valid submissions (27 percent)}, Author = {D. Minnen and C. Isbell and I. Essa and T. Starner}, Booktitle = {Proceedings of AAAI: American Association of Artificial Intelligence Conference}, City = {Vancouver, Canada}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; AI; Machine Learning}, Month = {July}, Title = {Discovering Multivariate Motifs using Subsequence DensityEstimation}, Topic = {Activity Recognition; AI; Machine Learning}, Year = {2007}} @inproceedings{Minnen:2006:Discovering-Characteristic, Abstract = {We present an approach to activity discovery, the unsupervised identification and modeling of human actions embedded in a larger sensor stream. Activity discovery can be seen as the inverse of the activity recognition problem. Rather than learn models from hand-labeled sequences, we attempt to discover motifs, sets of similar subsequences within the raw sensor stream, without the benefit of labels or manual segmentation. These motifs are statistically unlikely and thus typically correspond to important or characteristic actions within the activity. The problem of activity discovery differs from typical motif discovery, such as locating protein binding sites, because of the nature of time series data representing human activity. For example, in activity data, motifs will tend to be sparsely distributed, vary in length, and may only exhibit intra-motif similarity after appropriate time warping. In this paper, we motivate the activity discovery problem and present our approach for efficient discovery of meaningful actions from sensor data representing human activity. We empirically evaluate the approach on an exercise data set captured by a wrist-mounted, three-axis inertial sensor. Our algorithm successfully discovers motifs that correspond to the real exercises with a recall rate of 96.3 percent and overall accuracy of 86.7 percent over six exercises and 864 occurrences.}, Author = {D. Minnen and T. Starner and I. Essa and C. Isbell}, Booktitle = {Proceedings of IEEE International Symposium on Wearable Computers (ISWC)}, City = {Montreux, Switzerland}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {10.1109/ISWC.2006.286337}, Keywords = {Activity Recognition; Mobile Computing}, Month = {October}, Organization = {IEEE Computer Society}, Pages = {11-18}, Publisher = {IEEE Computer Society Press}, Read = {Yes}, Title = {Discovering Characteristic Actions from On-Body Sensor Data}, Topic = {Activity Recognition; Mobile Computing}, Year = {2006}, Bdsk-Url-1 = {http://ieeexplore.ieee.org/search/srchabstract.jsp?arnumber=4067720&isnumber=4067708&punumber=4067707&k2dockey=4067720@ieeecnfs&query=(discovering+characteristic+actions+from+on-body+sensor+data)%3Cin%3Emetadata&pos=0}, Bdsk-Url-2 = {http://dx.doi.org/10.1109/ISWC.2006.286337}} @inproceedings{Minnen:2006:Activity-Discovery:, Author = {D. Minnen and T. Starner and I. Essa and C. Isbell}, Booktitle = {Proceedings of The Learning Workshop at Snowbird}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; Machine Learning}, Month = {April}, Pdf = {http://www-static.cc.gatech.edu/gvu/ccg//people/david/papers/minnen-snowbird2006.pdf}, Title = {Activity Discovery: Sparse Motifs from Multivariate Time Series}, Topic = {Activity Recognition; Machine Learning}, Year = {2006}} @inproceedings{Minnen:2007:Improving-Activity, Abstract = {A fundamental problem for artificial intelligence is identifying perceptual primitives from raw sensory signals that are useful for higher-level reasoning. We equate these primitives with initially unknown recurring patterns known as motifs. Autonomously learning the motifs is difficult because their number, location, length, and shape are all unknown. Furthermore, nonlinear temporal warping may be required to ensure the similarity of motif occurrences. In this paper, we extend a leading motif discovery algorithm by allowing it to operate on multidimensional sensor data, incorporating automatic parameter estimation, and providing for motif-specific similarity adaptation. We evaluate our algorithm on several data sets and show how our approach leads to faster real world discovery and more accurate motifs compared to other leading methods.}, Annotate = {470 papers accepted of 1353 valid submissions (34 percent).}, Author = {D. Minnen and T. Starner and I. Essa and C. Isbell}, Booktitle = {Proceedings of International Joint Conference on Artificial Intelligence (IJCAI)}, City = {Hyderabad, India}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; AI; Machine Learning}, Month = {January}, Title = {Improving Activity Disocvery with Automatic Neighborhood Estimation}, Topic = {Activity Recognition; AI; Machine Learning}, Year = {2007}} @inproceedings{Minnen:2007:Discovering-Variable-Length, Author = {D. Minnen and T. Starner and I. Essa and C. Isbell}, Booktitle = {Proceedings of International Conference on Knowledge Discovery and Data Mining (KDDM)}, City = {San Jose, CA}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; Data Mining}, Month = {August}, Title = {Discovering Variable-Length Motifs in Multivariate Data Streams using Bayesian Surprise}, Topic = {Activity Recognition; Data Mining}, Year = {2007}} @inproceedings{Moore:2002:Recognizing-Multitasked, Address = {Edmonton, Alberta, Canada}, Author = {D. Moore and I. Essa}, Booktitle = {Proceedings of AAAI: American Association of Artificial Intelligence Conference}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; AI; Computer Vision}, Organization = {AAAI}, Title = {Recognizing Multitasked Activities from Video using Recognizing Multitasked Activities from Video using Stochastic Context-Free Grammar}, Topic = {Activity Recognition; AI; Computer Vision}, Year = {2002}} @techreport{Moore:1998:Context-Management, Author = {D. Moore and I. Essa and M. Hayes}, Date-Modified = {2008-09-03 16:26:03 -0400}, Institution = {Georgia Institute of Technology, Graphics, Visualization, and Usability Center}, Keywords = {Activity Recognition; Computer Vision}, Number = {GIT-GVU-98-26}, Title = {Context Management for Human Activity Recognition}, Topic = {Activity Recognition; Computer Vision}, Url = {http://www.gvu.gatech.edu/}, Year = {1998}, Bdsk-Url-1 = {http://www.gvu.gatech.edu/}} @inproceedings{Moore:1999:Exploiting-Human, Address = {Corfu, Greece}, Author = {D. Moore and I. Essa and M. Hayes}, Booktitle = {Proceedings of IEEE International Conference on Computer Vision (ICCV)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computer Vision; Activity Recognition}, Organization = {IEEE Computer Society}, Pages = {80--86}, Title = {Exploiting Human Actions and Object Context for Recognition Tasks}, Topic = {Computer Vision; Activity Recognition;}, Year = {1999}} @inproceedings{Moore:1999:Context-Management, Address = {Washington, DC, USA}, Author = {D. Moore and I. Essa and M. Hayes}, Booktitle = {Proceedings of Audio and Vision-based Person Authentication (AVBPA)}, Date-Modified = {2008-09-03 16:27:48 -0400}, Keywords = {Activity Recognition; Computer Vision}, Title = {Context Management for Human Activity Recognition}, Topic = {Activity Recognition; Computer Vision}, Year = {1999}} @inproceedings{Mynatt:2000:Increasing-the-Opportunities, Acm = {http://doi.acm.org/10.1145/355460.355475}, Author = {E. Mynatt and I. Essa and W. Rogers}, Booktitle = {Proceedings of ACM Conference on Universal Usability (CUU)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1145/355460.355475}, Html = {http://www.awarehome.gatech.edu}, Keywords = {Aging-in-Place; Aware Home; HCI}, Month = {November}, Pdf = {http://www-static.cc.gatech.edu/fce/ahri/publications/agingInPlace-cuu2000.pdf}, Title = {Increasing the Opportunities for Aging in Place}, Topic = {Aging-in-Place; Aware Home; HCI}, Year = {2000}, Bdsk-Url-1 = {http://dx.doi.org/10.1145/355460.355475}} @inproceedings{Padoy:2007:A-Boosted-Segmentation, Address = {Brisbane, Australia}, Annotate = {Accepted 237 of 637 submitted papers (37 percent) ORAL presentation.}, Author = {N. Padoy and T. Blum and I. Essa and H. Feussner and M. O. Berger and N. Navab}, Booktitle = {Proceedings of International Conference on Medical Imaging Computing and Computer Assisted Intervention, (MICCAI)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; Medical Imaging}, Publisher = {Springer Lecture Notes in Computer Science (LNCS) series}, Title = {A Boosted Segmentation Method for Surgical Workflow Analysis}, Topic = {Activity Recognition; Medical Imaging}, Year = {2007}} @inproceedings{Parry:2003:Rhythmic-Similarity, Abstract = {Rhythmic similarity techniques for audio tend to, evaluate how close to identical two rhythms are. This paper proposes a similarity metric based on rhythmic elaboration that matches rhythms that share the same beats regardless of tempo or identicalness. Elaborations can help an application decide where to transition between songs. Potential applications include automatically generating a non-stop music mix or sonically browsing a music library.}, Address = {Baltimore, MD}, Annotate = {(23 long + 22 short + 8 poster)/(64 long + 14 poster) = 68 percent}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of International Conference on Music Information Retrieval}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/elaboration.htm}, Keywords = {Audio/Music Analysis}, Month = {October}, Pages = {251--252}, Pdf = {http://ismir2003.ismir.net/papers/Parry.PDF}, Title = {Rhythmic Similarity through Elaboration}, Topic = {Audio/Music Analysis}, Year = {2003}} @inproceedings{Parry:2004:Feature-Weighting, Abstract = {This paper proposes the use of feature weights to reveal the hierarchical nature of music audio. Feature weighting has been exploited in machine learning, but has not been applied to music audio segmentation. We describe both a global and a local approach to automatic feature weighting. The global approach assigns a single weighting to all features in a song. The local approach uses the local separability directly. Both approaches reveal structure that is obscured by standard features, and emphasize segments of a particular size.}, Address = {Barcelona}, Anotate = {(27 long + 33 short + 44 poster)/(116 total) = 90 percent}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of International Conference on Music Information Retrieval}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/segmentation.htm}, Keywords = {Audio/Music Analysis; Machine Learning}, Month = {October}, Pages = {116--119}, Pdf = {http://ismir2004.ismir.net/proceedings/p023-page-116-paper192.pdf}, Title = {Feature Weighting for Segmentation}, Topic = {Audio/Music Analysis; Machine Learning}, Year = {2004}} @inproceedings{Parry:2005:Blind-Source, Abstract = {Blind source separation algorithms typically, involve decorrelating time-aligned mixture signals. The usual assumption is that all sources are active at all times. However, if this is not the case, we show that the unique pattern of source activity/inactivity helps separation. Music is the most obvious example of sources exhibiting repetitive structure because it is carefully constructed. We present a novel source separation algorithm based on spatial time-time distributions that capture the repetitive structure in audio. Our method outperforms time-frequency source separation when source spectra are highly overlapping.}, Address = {Madrid}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of International Conference on Digital Audio Effects}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/repstruc.htm}, Keywords = {Audio/Music Analysis}, Month = {September}, Pages = {143--148}, Pdf = {http://dafx05.ssr.upm.es/Proc_DAFx05/P_143.pdf}, Title = {Blind Source Separation Using Repetitive Structure}, Topic = {Audio/Music Analysis}, Year = {2005}} @inproceedings{Parry:2006:Source-Detection, Abstract = {Blind source separation algorithms typically require that the number of sources are known in advance. However, it is often the case that the number of sources could change over time and that the total number is not known. Existing source separation techniques require source number estimation methods to determine how many sources are active within the mixtures. These methods typically operate on the covariance matrix of mixture recordings and require fewer active sources than mixtures. When sources do not overlap in the time-frequency domain, more sources than mixtures may be detected and then separated. However, separating more sources than mixtures when sources overlap in time and frequency poses a particularly difficult problem. This paper addresses the issue of source detection when more sources than sensors overlap in time and frequency. We show that repetitive structure in the form of time-time correlation matrices can reveal when each source is active.}, Address = {Toulouse, France}, Annotate = {Acceptance rate: 1465 / 3045 = 48 percent}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/repstruc.htm}, Ieee = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?isnumber=34760&arnumber=1661163&count=305&index=280}, Keywords = {Audio/Music Analysis}, Month = {May}, Pages = {1093--1096}, Pdf = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/parry-essa-icassp06.pdf}, Title = {Source Detection Using Repetitive Structure}, Topic = {Audio/Music Analysis}, Volume = {4}, Year = {2006}} @inproceedings{Parry:2006:Estimating-the-Spatial, Abstract = {One way of separating sources from a single mixture, recording is by extracting spectral components and then combining them to form estimates of the sources. The grouping process remains a difficult problem. We propose, for instances when multiple mixture signals are available, clustering the components based on their relative contribution to each mixture (\ie their spatial position). We introduce novel factorizations of magnitude spectrograms from multiple recordings and derive update rules that extend independent subspace analysis and non-negative matrix factorization to concurrently estimate the spectral shape, time envelope, and spatial position of each component. We show that estimated component positions are near the position of their corresponding source, and that multichannel non-negative matrix factorization can distinguish three pianos by their position in the mixture.}, Address = {Charleston, SC}, Annotate = {Acceptance rate: 45 oral / 183 total = 25 percent}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of International Conference on Independent Component Analysis and Blind Signal Separation}, Date-Modified = {2008-09-03 16:28:39 -0400}, Html = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/magspec.htm}, Keywords = {Audio/Music Analysis}, Month = {March}, Pages = {666--673}, Pdf = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/parry-essa-ica06.pdf}, Publisher = {Springer}, Series = {Lecture Notes in Computer Science (LNCS)}, Title = {Estimating the Spatial Position of Spectral Components in Audio}, Topic = {Audio/Music Analysis}, Year = {2006}} @inproceedings{Parry:2006:Spectrogram-Factorization, Abstract = {Spectrogram factorization methods have been proposed for single channel source separation including independent subspace analysis and non-negative matrix factorization. These methods assume that the mixture spectrogram is a linear combination of the source spectrograms. However, this is an incorrect assumption because the mixture spectrogram additionally depends on the (unknown) phase of the sources. This paper investigates the role of phase in estimating the source spectrograms from the mixture spectrogram and incorporates a probabilistic representation of phase to improve separation results.}, Address = {Whistler, Canada}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of Neural Information Processing Systems: Workshop on Advances in Models for Acoustic Processing}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/magspec.htm}, Keywords = {Audio/Music Analysis; Machine Learning}, Month = {December}, Pdf = {http://www.idiap.ch/~barber/amac/parry-nipswkshp06.pdf}, Title = {Spectrogram Factorization Using Phase Information}, Topic = {Audio/Music Analysis; Machine Learning}, Year = {2006}} @inproceedings{Parry:2007:Incorporating-Phase, Abstract = {Spectrogram factorization methods have been proposed for single channel source separation and audio analysis. Typically, the mixture signal is first converted into a time-frequency representation such as the short-time Fourier transform (STFT). The phase information is thrown away and this spectrogram matrix is then factored into the sum of rank-one source spectrograms. This approach incorrectly assumes the mixture spectrogram is the sum of the source spectrograms. In fact, the mixture spectrogram depends on the phase of the source STFTs. We investigate the consequences of this common assumption and introduce an approach that leverages a probabilistic representation of phase to improve the separation results.}, Address = {Honolulu, Hawaii}, Annotate = {Acceptance rate: 1344 / 2912 = 46 percent}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/magspec.htm}, Ieee = {na}, Keywords = {Audio/Music Analysis}, Month = {April}, Pdf = {http://www.gvu.gatech.edu/cpl/projects/musicaudio/parry-essa-icassp07.pdf}, Title = {Incorporating Phase Information for Source Separation via Spectrogram Factorization}, Topic = {Audio/Music Analysis}, Year = {2007}} @inproceedings{Parry:2007:Phase-Aware-Non-negative, Abstract = {Non-negative spectrogram factorization has been proposed for single-channel source separation and transcription. These methods operate on the magnitude or power spectrogram of the input mixture and estimate the magnitude or power spectrogram of source components. The usual assumption is that the mixture spectrogram is the sum of the source components. However, this relationship additionally depends on the unknown phase of the sources. Using a probabilistic representation of phase we derive a cost function that incorporates this uncertainty. We compare this cost function against four standard approaches for a variety of spectrogram sizes, numbers of components, and component distributions. This phase-aware cost function reduces the estimation error but is more affected by detection errors.}, Address = {London}, Author = {R. M. Parry and I. Essa}, Booktitle = {Proceedings of International Conference on Independent Component Analysis and Blind Signal Separation}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Audio/Music Analysis}, Month = {September}, Title = {Phase-Aware Non-negative Spectrogram Factorization}, Topic = {Audio/Music Analysis}, Year = {2007}} @inproceedings{Pentland:1994:Visually-guided, Author = {A. Pentland and T. Darrell and I. Essa and A. Azarbayejani and S. Sclaroff}, Booktitle = {Proceedings of Computer Animation Conference}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/CA.1994.324000}, Keywords = {Computer Animation; Modeling}, Organization = {IEEE Computer Society}, Pages = {76 - 83}, Pdf = {http://ieeexplore.ieee.org/iel2/987/7718/00324000.pdf?tp=&arnumber=324000&isnumber=7718}, Publisher = {IEEE Computer Society Press}, Title = {Visually guided animation}, Topic = {Computer Animation; Modeling}, Year = {1994}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/CA.1994.324000}} @article{Pentland:1990:The-Thingworld-Modeling, Abstract = {We describe a real-time solid modeling system that is based on the physical analogy of forming clay by applying forces. The system is implemented by simulating real materials as they react to user-supplied forces. Unlike other physically-based modeling approaches, the Thingworld system allows the user to restrict forming action to simple global deformations during the initial roughing-in phase of modeling, and then later concern themselves with detailing. The Thingworld system also allows users to automatically model existing objects by using measurements taken from the object's surface. These measurements are used to generate artificial forces that mold the computer model much as a human would mold a clay model. Timed examples for constructing solid models are shown.}, Author = {A. Pentland and I. Essa and M. Friedmann and B. Horowitz and S. E. Sclaroff}, Date-Modified = {2008-09-03 16:26:03 -0400}, Journal = {ACM SIGGRAPH Proceedings of Symposium on Interactive 3D Graphics (I3DG)}, Keywords = {Computer Animation; Modeling}, Number = {2}, Pages = {143--144}, Read = {Yes}, Title = {The Thingworld Modeling System: Virtual Sculpting by Modal Forces}, Topic = {Computer Animation; Modeling}, Volume = {24}, Year = {1990}, Bdsk-Url-1 = {http://doi.acm.org/10.1145/91394.91434}} @inproceedings{Pentland:1994:Visually-guided-1, Abstract = {We survey research at the M.I.T. Media Laboratory concerned with accurately modeling, tracking, and interacting with people. Applications include computer animation, user interfaces, and video understanding.}, Address = {Pacific Grove, CA}, Author = {A. Pentland and S. Sclaroff and T. Darrell and I. Essa and A. Azarbayejani and T. Starner}, Booktitle = {Proceedings of Asilomar Conference on Signals, Systems, and Computers}, Date-Modified = {2009-09-15 18:28:33 -0400}, Doi = {http://dx.doi.org/10.1109/ACSSC.1994.471666}, Keywords = {Animation; Computer Vision; Multimedia}, Pages = {1287 - 1291}, Pdf = {http://ieeexplore.ieee.org/iel2/3310/9942/00471666.pdf?tp=&arnumber=471666&isnumber=9942}, Title = {Visually guided interaction and animation}, Topic = {Animation; Computer Vision; Multimedia}, Volume = {2}, Year = {1994}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/ACSSC.1994.471666}} @inproceedings{Reveret:2001:Visual-Tracking, Author = {L. Reveret and I. Essa}, Booktitle = {Proceedings of IEEE Workshop on Cues in Communication}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Face & Gesture; Multimodal Interfaces; Computer Vision; Animation}, Month = {December}, Note = {(held in Conjunction with CVPR 2001)}, Publisher = {IEEE Computer Society}, Title = {Visual Tracking and Coding of Speech Related Facial Actions}, Topic = {Face & Gesture; Multimodal Interfaces; Computer Vision; Animation}, Year = {2001}} @article{Rogers:2007:Designing-a-Technology, Abstract = {TECHNOLOGY IN THE HOME HAS THE potential to support older adults in a variety of ways. The success of such technology depends on understanding the needs and capabilities of the user and on developing the technology to provide seamless and appropriate support. Our goal was to develop a technology ``coach'' that could support older adults in learning to use a medical device -- in this case, a blood glucose meter. Our approach was interdisciplinary: It involved human factors/ergonomics (HF/E) researchers with expertise in cognitive psychology and a computer scientist with expertise in computer vision. Based on our analysis of user capabilities and task demands, we developed a computer vision system that could noninvasively observe, track, recognize, and interpret a person's interaction with the meter.We assessed the relative benefits of different feedback types to correct errors. This research illustrates the potential for the development of in-home personal assistants and the necessity for interdisciplinary approaches to the design of ``smart'' home technologies.}, Author = {W. Rogers and I. Essa and A. Fisk}, Date-Modified = {2008-09-03 16:26:03 -0400}, Journal = {Ergonomics in Design, Journal of the Human Factors and Ergonomics Society}, Keywords = {Engineering Psychology; Human Factors; Aware Home; Aging-in-Place}, Title = {Designing a Technology Coach}, Topic = {Engineering Psychology; Human Factors; Aware Home; Aging-in-Place}, Year = {2007}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAaIAAAAAAaIAAgAABWlyZmFuAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABCRAAB/////xlSb2dlcnMtRXNzYS1GaXNrLTIwMDcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAA/////wAAEgFjdQAAAAAAAAAAAAAAAAAGcGFwZXJzAAIAOS86Vm9sdW1lczppcmZhbjp3d3c6cHViczpwYXBlcnM6Um9nZXJzLUVzc2EtRmlzay0yMDA3LnBkZgAADgA0ABkAUgBvAGcAZQByAHMALQBFAHMAcwBhAC0ARgBpAHMAawAtADIAMAAwADcALgBwAGQAZgAPAAwABQBpAHIAZgBhAG4AEgAqL3d3dy9wdWJzL3BhcGVycy9Sb2dlcnMtRXNzYS1GaXNrLTIwMDcucGRmABMADi9Wb2x1bWVzL2lyZmFuAAkANAA0Y2lmcwAAAQAAAHNtYjovL2lyZmFuQGNjc2FtYmEuY2MuZ2F0ZWNoLmVkdS9pcmZhbgD//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QQy4uLy4uLy4uLy4uL1ZvbHVtZXMvaXJmYW4vd3d3L3B1YnMvcGFwZXJzL1JvZ2Vycy1Fc3NhLUZpc2stMjAwNy5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKACRgJIAk0CVgJhAmUCcwJ6AoMCyQLOAtEC3gLjAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAAvU=}} @inproceedings{Ruddarraju:2003:Fast-Multiple, Address = {Halifax, Canada}, Author = {R. Ruddarraju and A. Haro and I. Essa}, Booktitle = {Proceedings of International Conference on Vision Interface (VI)}, Date-Modified = {2009-10-19 17:38:49 -0400}, Keywords = {Multimodal Interfaces; Vision for HCI; HCI}, Title = {Fast Multiple Camera Head Pose Tracking}, Topic = {Multimodal Interfaces; Vision for HCI; HCI}, Year = {2003}} @inproceedings{Ruddarraju:2003:Perceptual-User, Address = {Vancouver, Canada}, Anotate = {34 percent}, Author = {R. Ruddarraju and A. Haro and K. Nagel and Q. Tran and I. Essa and G. Abowd and E. Mynatt}, Booktitle = {Proceedings of International Conference on Multimodal Interfaces (ICMI)}, Date-Modified = {2009-10-19 17:39:03 -0400}, Keywords = {Multimodal Interfaces; HCI; Vision for HCI}, Title = {Perceptual User Interfaces using Vision-Based Eye Tracking}, Topic = {Multimodal Interfaces; HCI; Vision for HCI}, Year = {2003}} @inproceedings{Schodl:2001:Depth-layers, Address = {Kauai, HAWAI, US}, Author = {A. Sch{\"o}dl and I. Essa}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/CVPR.2001.990534}, Keywords = {Image-based Modeling; Computer Vision}, Organization = {IEEE Computer Society}, Pages = {639 - 644}, Pdf = {http://ieeexplore.ieee.org/iel5/7768/21353/00990534.pdf?tp=&arnumber=990534&isnumber=21353}, Publisher = {IEEE Computer Society Press}, Title = {Depth layers from occlusions}, Topic = {Image-based Modeling; Computer Vision}, Volume = {1}, Year = {2001}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/CVPR.2001.990534}} @inproceedings{Schodl:1998:Head-Tracking, Address = {San Francisco, CA. USA}, Author = {A. Sch{\"o}dl and A. Haro and I. Essa}, Booktitle = {Proceedings of Workshop on Perceptual User Interfaces (PUI)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Face & Gesture; Vision for HCI}, Note = {In Conjunction with UIST 1998}, Title = {Head Tracking using a Textured Polygonal Model}, Topic = {Face & Gesture; Vision for HCI}, Year = {1998}} @inproceedings{Schodl:2000:Video-textures, Address = {New York, NY, USA}, Author = {A. Sch{\"o}dl and R. Szeliski and D. H. Salesin and I. Essa}, Booktitle = {ACM SIGGRAPH Proceedings of Annual Conference on Computer graphics and interactive techniques}, Date-Modified = {2008-09-03 16:30:47 -0400}, Doi = {http://doi.acm.org/10.1145/344779.345012}, Isbn = {1-58113-208-5}, Keywords = {Computational Photography & Video; Image-based Rendering; Animation; Video Textures; Texture Synthesis}, Pages = {489--498}, Publisher = {ACM Press/Addison-Wesley Publishing Co.}, Title = {Video textures}, Topic = {Computational Photography & Video; Image-based Rendering; Animation; Video Textures; Texture Synthesis}, Year = {2000}, Bdsk-Url-1 = {http://doi.acm.org/10.1145/344779.345012}} @inproceedings{Schodl:2000:Machine-Learning, Author = {A. Sch{\"o}dl and I. Essa}, Booktitle = {Proceedings of Conference on Advances in Neural Information Processing Systems (NIPS)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Photography & Video; Machine Learning; Computer Animation}, Pages = {1002-1008}, Title = {Machine Learning for Video-Based Rendering.}, Topic = {Computational Photography & Video; Machine Learning; Computer Animation}, Url = {http://nips.djvuzone.org/djvu/nips04/0209.djvu}, Year = {2000}, Bdsk-Url-1 = {http://nips.djvuzone.org/djvu/nips04/0209.djvu}} @inproceedings{Schodl:2002:Controlled-Animation, Address = {San Antonio, TX, USA}, Author = {A. Sch{\"o}dl and I. Essa}, Booktitle = {Proceedings of ACM Symposium on Computer Animation (SCA)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computational Photography & Video; Animation; Video Textures}, Organization = {ACM SIGGRAPH}, Publisher = {ACM Press}, Title = {Controlled Animation of Video Sprites}, Topic = {Computational Photography & Video; Animation; Video Textures}, Year = {2002}} @inproceedings{Schodl:1999:Adaptive-Parallelization, Author = {A. Sch{\"o}dl and K. Schwan and I. Essa}, Booktitle = {Proceedings of International Conference on Parallel and Distributed Processing Techniques and Applications (PDPTA)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Isbn = {1-892512-44-0}, Keywords = {Face & Gesture; Multimodal Interfaces; Systems}, Pages = {1571-1577}, Title = {Adaptive Parallelization of Model-Based Head Tracking.}, Topic = {Face & Gesture; Multimodal Interfaces; Systems}, Year = {1999}} @inproceedings{Sclaroff:1992:Vision-based-Modeling:, Author = {S. Sclaroff and I. Essa and A. Pentland}, Booktitle = {Proceedings of Eurographics Workshop on Animations and Simulations}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Animation; Computer Vision; Image-based Modeling}, Title = {Vision-based Modeling: An application of a Unified Approach for Physical and Geometric Modeling for Graphics and Animation}, Topic = {Animation; Computer Vision; Image-based Modeling}, Year = {1992}} @inproceedings{Shi:2006:Learning-Temporal, Abstract = {Graphical models are often used to represent and recognize activities. Purely unsupervised methods (such as HMMs) can be trained automatically but yield models whose internal structure - the nodes - are difficult to interpret semantically. Manually constructed networks typically have nodes corresponding to sub-events, but the programming and training of these networks is tedious and requires extensive domain expertise. In this paper, we propose a semi-supervised approach in which a manually structured, Propagation Network (a form of a DBN) is initialized from a small amount of fully annotated data, and then refined by an EM-based learning method in an unsupervised fashion. During node refinement (the M step) a boosting-based algorithm is employed to train the evidence detectors of individual nodes. Experiments on a variety of data types - vision and inertial measurements - in several tasks demonstrate the ability to learn from as little as one fully annotated example accompanied by a small number of positive but non-annotated training examples. The system is applied to both recognition and anomaly detection tasks.}, Author = {Y. Shi and A. Bobick and I. Essa}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/CVPR.2006.174}, Keywords = {Activity Recognition; Computer Vision; Machine Learning}, Month = {June}, Pages = {1631 - 1638}, Pdf = {http://www.cc.gatech.edu/~monsoon/PropagationNet/SemiLearning.pdf}, Publisher = {IEEE Computer Society}, Title = {Learning Temporal Sequence Model from Partially Labeled Data}, Topic = {Activity Recognition; Computer Vision; Machine Learning}, Year = {2006}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/CVPR.2006.174}} @inproceedings{Shi:2004:Propagation-Networks, Abstract = {We present Propagation Networks (P-Nets), a novel approach for, representing and recognizing sequential activities that include parallel streams of action. We represent each activity using partially ordered intervals. Each interval is restricted by both temporal and logical constraints, including information about its duration and its temporal relationship with other intervals. P-Nets associate one node with each temporal interval. Each node is triggered according to a probability density function that depends on the state of its parent nodes. Each node also has an associated observation function that characterizes supporting perceptual evidence. To facilitate realtime analysis, we introduce a particle filter framework to explore the conditional state space. We modify the original Condensation algorithm to more efficiently sample a discrete state space (D-Condensation). Experiments in the domain of blood glucose monitor calibration demonstrate both the representational power of P-Nets and the effectiveness of the D-Condensation algorithm.}, Address = {Washington, DC}, Anotate = {17 percent}, Author = {Y. Shi and Y. Huang and D. Minnen and A. Bobick and I. Essa}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Activity Recognition; Machine Learning; Computer Vision}, Pages = {862-869}, Publisher = {IEEE Computer Society}, Title = {Propagation Networks for recognition of partially ordered sequential action}, Topic = {Activity Recognition; Machine Learning; Computer Vision}, Url = {http://www.cc.gatech.edu/cpl/projects/monsoon/PropagationNet/PropagationNet.htm}, Year = {2004}, Bdsk-Url-1 = {http://www.cc.gatech.edu/cpl/projects/monsoon/PropagationNet/PropagationNet.htm}} @inproceedings{Steedly:2001:Propagation-of-Innovative, Address = {Vancouver, Canada}, Author = {D. Steedly and I. Essa}, Booktitle = {Proceedings of IEEE International Conference on Computer Vision (ICCV)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/ICCV.2001.937628}, Keywords = {Image-based Modeling; Structure from Motion; Computer Vision}, Pages = {223--229}, Pdf = {http://ieeexplore.ieee.org/iel5/7460/20294/00937628.pdf?tp=&arnumber=937628&isnumber=20294}, Title = {Propagation of Innovative Information in {Non-Linear} {Least-Squares} Structure from Motion}, Topic = {Image-based Modeling; Structure from Motion; Computer Vision}, Volume = {2}, Year = {2001}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICCV.2001.937628}} @inproceedings{Steedly:2003:Spectral-Partitioning, Address = {Nice, France}, Author = {D. Steedly and I. Essa and F. Dellaert}, Booktitle = {Proceedings of IEEE International Conference on Computer Vision (ICCV)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Computer Vision; Image-based Modeling; Structure from Motion}, Pages = {996--1003}, Pdf = {http://ieeexplore.ieee.org/iel5/8769/27772/01238457.pdf?tp=&arnumber=1238457&isnumber=27772}, Publisher = {IEEE Computer Society}, Title = {Spectral Partitioning for Structure from Motion}, Topic = {Computer Vision; Image-based Modeling; Structure from Motion}, Year = {2003}} @inproceedings{Stillman:2001:Towards-Reliable, Acm = {http://doi.acm.org/10.1145/971478.971499}, Author = {S. Stillman and I. Essa}, Booktitle = {Proceedings of Workshop on Perceptual User Interfaces (PUI)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1145/971478.971499}, Keywords = {Audio-Visual Tracking; Visual Tracking; Multimodal Interfaces; HCI}, Note = {Held in Conjunction with ACM UIST 2001 Conference}, Publisher = {ACM Press}, Title = {Towards Reliable Multimodal Sensing in Aware Environments}, Topic = {Audio-Visual Tracking; Visual Tracking; Multimodal Interfaces; HCI}, Year = {2001}, Bdsk-Url-1 = {http://dx.doi.org/10.1145/971478.971499}} @inproceedings{Stillman:1999:A-system-for-tracking, Address = {Washington, DC, USA}, Author = {S. Stillman and R. Tanawongsuwan and I. Essa}, Booktitle = {Proceedings of Audio and Vision-based Person Authentication (AVBPA)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Keywords = {Aware Home; Intelligent Environments}, Month = {March}, Title = {A system for tracking and recognizing multiple people with multiple cameras}, Topic = {Aware Home; Intelligent Environments}, Year = {1999}} @article{Sukel:2003:Presenting-Movement, Author = {K. E. Sukel and R. Catrambone and I. Essa and G. J. Brostow}, Date-Modified = {2008-09-03 16:26:03 -0400}, Journal = {International Journal of Human-Computer Interaction}, Keywords = {Human Motion Analysis}, Number = {3}, Pages = {433--452}, Pdf = {http://www-static.cc.gatech.edu/~brostow/research/ijhci.pdf}, Title = {Presenting Movement in a Computer-Based Dance Tutor}, Topic = {HCI; Human Motion Analysis}, Volume = {15}, Year = {2003}} @techreport{Tunawongsuwan:1999:Robust-Tracking, Author = {R. Tunawongsuwan and A. Stoychev and I. Essa}, Date-Modified = {2008-09-03 16:26:03 -0400}, Institution = {Georgia Institute of Technology, GVU Center}, Keywords = {Computer Vision; Robotics; Visual Tracking}, Note = {Available from http://www.gvu.gatech.edu/gvu/reports/1999/}, Number = {99-19}, Title = {Robust Tracking of People by a Mobile Robotic Agent}, Topic = {Computer Vision; Robotics; Visual Tracking}, Year = {1999}} @inproceedings{Xu:2003:Mandatory-Human, Address = {Dallas, Texas, USA}, Author = {J. Xu and R. Lipton and I. Essa and M. Sung and Y. Zhu}, Booktitle = {Proceedings of International Conference on Computer Communications and Networks}, Date-Modified = {2009-10-19 17:38:09 -0400}, Doi = {http://dx.doi.org/10.1109/ICCCN.2003.1284222}, Keywords = {Computational Photography & Video; Computer Security}, Pdf = {http://ieeexplore.ieee.org/iel5/9031/28660/01284222.pdf?tp=&arnumber=1284222&isnumber=28660}, Title = {Mandatory Human Participation: A New Authentication Scheme for Building Secure Systems}, Topic = {Computer Security; Computational Photography & Video}, Year = {2003}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICCCN.2003.1284222}} @inproceedings{Yin:2007:Tree-based-Classifiers, Abstract = {This paper presents an algorithm for the automatic segmentation of monocular videos into foreground and background layers. Correct segmentations are produced even in the presence of large background motion with nearly stationary foreground. There are three key contributions. The first is the introduction of a novel motion representation, ``motons'', inspired by research in object recognition. Second, we propose learning the segmentation likelihood from the spatial context of motion. The learning is efficiently performed by Random Forests. The third contribution is a general taxonomy of tree-based classifiers, which facilitates theoretical and experimental comparisons of several known classification algorithms, as well as spawning new ones. Diverse visual cues such as motion, motion context, colour, contrast and spatial priors are fused together by means of a Conditional Random Field (CRF) model. Segmentation is then achieved by binary min-cut. Our algorithm requires no initialization. Experiments on many video-chat type sequences demonstrate the effectiveness of our algorithm in a variety of scenes. The segmentation results are comparable to those obtained by stereo systems.}, Address = {Minneapolis, MN, USA}, Author = {P. Yin and A. Criminisi and J. Winn and I. Essa}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://research.microsoft.com/vision/cambridge/i2i/}, Keywords = {Computational Photography & Video; Computer Vision}, Month = {June}, Pages = {pp 1--8}, Publisher = {IEEE Computer Society}, Title = {Tree-based Classifiers for Bilayer Video Segmentation}, Topic = {Computational Photography & Video; Computer Vision}, Year = {2007}} @inproceedings{Yin:2003:Boosted-Audio-Visual, Abstract = {We propose a new approach for combining acoustic and, visual measurements to aid in recognizing lip shapes of a person speaking. Our method relies on computing the maximum likelihoods of (a) HMM used to model phonemes from the acoustic signal, and (b) HMM used to model visual features motions from video. One significant addition in this work is the dynamic analysis with features selected by AdaBoost, on the basis of their discriminant ability. This form of integration, leading to boosted HMM, permits AdaBoost to find the best features first, and then uses HMM to exploit dynamic information inherent in the signal.}, Address = {Nice, France}, Anotate = {ORAL, 10 orals, and 20 posters 10/78=12.8 percent}, Author = {P. Yin and I. Essa and J. M. Rehg}, Booktitle = {Proceedings of International Workshop on Analysis and Modeling of Faces and Gestures (AMFG)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Html = {http://www-static.cc.gatech.edu/gvu/perception//projects/speechreading/index.html}, Ieee = {http://csdl.computer.org/comp/proceedings/amfg/2003/2010/00/20100068abs.htm}, Keywords = {Face & Gesture; Speech Reading; Computer Vision}, Month = {October}, Pages = {68--73}, Pdf = {http://www-static.cc.gatech.edu/~pyin/pdf/BAVHMM.pdf}, Series = {held in conjunction with IEEE ICCV 2003}, Title = {Boosted Audio-Visual HMM for Speech Reading}, Topic = {Face & Gesture; Speech Reading; Computer Vision}, Year = {2003}} @inproceedings{Yin:2003:Boosted-Audio-Visual-A, Address = {Asilomar, CA, USA}, Anotate = {invited paper}, Author = {P. Yin and I. Essa and J. M. Rehg}, Booktitle = {Proceedings of Asilomar Conference on Signals, Systems, and Computers}, Date-Modified = {2008-09-03 16:28:00 -0400}, Html = {http://www-static.cc.gatech.edu/gvu/perception//projects/speechreading/index.html}, Keywords = {Face & Gesture; Speech Reading; Computer Vision}, Month = {November}, Pages = {2013-2018}, Pdf = {http://www.cc.gatech.edu/~pyin/pdf/BAVHMM.pdf}, Title = {Boosted Audio-Visual HMM for Speech Reading}, Topic = {Face & Gesture; Speech Reading; Computer Vision}, Year = {2003}} @inproceedings{Yin:2004:Asymmetrically-Boosted, Abstract = {Speech reading, also known as lip reading, is aimed at, extracting visual cues of lip and facial movements to aid in recognition of speech. The main hurdle for speech reading is that visual measurements of lip and facial motion lack information-rich features like the Mel frequency cepstral coefficients (MFCC), widely used in acoustic speech recognition. These MFCC are used with hidden Markov models (HMM) in most speech recognition systems at present. Speech reading could greatly benefit from automatic selection and formation of informative features from measurements in the visual domain. These new features can then be used with HMM to capture the dynamics of lip movement and eventual recognition of lip shapes. Towards this end, we use AdaBoost methods for automatic visual feature formation. Specifically, we design an asymmetric variant of AdaBoost M2 algorithm to deal with the ill-posed multi-class sample distribution inherent in our problem. Our experiments show that the boosted HMM approach outperforms conventional AdaBoost and HMM classifiers. Our primary contributions are in the design of (a) boosted HMM and (b) asymmetric multi-class boosting.}, Address = {Washington DC, USA}, Author = {P. Yin and I. Essa and J. M. Rehg}, Booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, Date-Modified = {2008-09-03 16:26:03 -0400}, Doi = {http://dx.doi.org/10.1109/CVPR.2004.37}, Html = {http://www-static.cc.gatech.edu/gvu/perception//projects/speechreading/index.html}, Ieee = {http://doi.ieeecomputersociety.org/10.1109/CVPR.2004.37}, Keywords = {Face & Gesture; Speech Reading; Computer Vision}, Month = {June}, Pages = {pp II755-761}, Pdf = {http://www-static.cc.gatech.edu/~pyin/pdf/aBHMMsr-Final.pdf}, Publisher = {IEEE Computer Society}, Title = {Asymmetrically Boosted HMM for Speech Reading}, Topic = {Face & Gesture; Speech Reading; Computer Vision}, Year = {2004}, Bdsk-Url-1 = {http://dx.doi.org/10.1109/CVPR.2004.37}}