A searchable list of some of my publications is below. You can also access my publications from the following sites.
My ORCID is![ORCID iD icon](https://orcid.org/sites/default/files/images/orcid_16x16.png)
Publications:
Luke Drnach, J. L. Allen, Irfan Essa, Lena H. Ting
A Data-Driven Predictive Model of Individual-Specific Effects of FES on Human Gait Dynamics Proceedings Article
In: Proceedings International Conference on Robotics and Automation (ICRA), 2019.
Links | BibTeX | Tags: gait analysis, robotics
@inproceedings{2019-Drnach-DPMIEHGD,
title = {A Data-Driven Predictive Model of Individual-Specific Effects of FES on Human Gait Dynamics},
author = {Luke Drnach and J. L. Allen and Irfan Essa and Lena H. Ting},
url = {https://neuromechanicslab.emory.edu/documents/publications-docs/Drnach%20et%20al%20Data%20Driven%20Gait%20Model%20ICRA%202019.pdf},
doi = {10.1109/ICRA.2019.8794304},
year = {2019},
date = {2019-05-01},
urldate = {2019-05-01},
booktitle = {Proceedings International Conference on Robotics and Automation (ICRA)},
keywords = {gait analysis, robotics},
pubstate = {published},
tppubtype = {inproceedings}
}
Unaiza Ahsan, Rishi Madhok, Irfan Essa
Video Jigsaw: Unsupervised Learning of Spatiotemporal Context for Video Action Recognition Proceedings Article
In: IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 179-189, 2019, ISSN: 1550-5790.
Links | BibTeX | Tags: activity recognition, computer vision, machine learning, WACV
@inproceedings{2019-Ahsan-VJULSCVAR,
title = {Video Jigsaw: Unsupervised Learning of Spatiotemporal Context for Video Action Recognition},
author = {Unaiza Ahsan and Rishi Madhok and Irfan Essa},
url = {https://ieeexplore.ieee.org/abstract/document/8659002},
doi = {10.1109/WACV.2019.00025},
issn = {1550-5790},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
pages = {179-189},
keywords = {activity recognition, computer vision, machine learning, WACV},
pubstate = {published},
tppubtype = {inproceedings}
}
S. Hickson, N. Dufour, A. Sud, V. Kwatra, I. Essa
Eyemotion: Classifying Facial Expressions in VR Using Eye-Tracking Cameras Proceedings Article
In: IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1626-1635, 2019, ISSN: 1550-5790.
Abstract | Links | BibTeX | Tags: audio-video fusion, face & gesture, face processing, multimodal interfaces, WACV
@inproceedings{2019-Hickson-ECFEUEC,
title = {Eyemotion: Classifying Facial Expressions in VR Using Eye-Tracking Cameras},
author = {S. Hickson and N. Dufour and A. Sud and V. Kwatra and I. Essa},
url = {https://ieeexplore.ieee.org/document/8658392
https://ai.google/research/pubs/pub46291},
doi = {10.1109/WACV.2019.00178},
issn = {1550-5790},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
pages = {1626-1635},
abstract = {One of the main challenges of social interaction in virtual reality settings is that head-mounted displays occlude a large portion of the face, blocking facial expressions and thereby restricting social engagement cues among users. We present an algorithm to automatically infer expressions by analyzing only a partially occluded face while the user is engaged in a virtual reality experience. Specifically, we show that images of the user's eyes captured from an IR gaze-tracking camera within a VR headset are sufficient to infer a subset of facial expressions without the use of any fixed external camera. Using these inferences, we can generate dynamic avatars in real-time which function as an expressive surrogate for the user. We propose a novel data collection pipeline as well as a novel approach for increasing CNN accuracy via personalization. Our results show a mean accuracy of 74% (F1 of 0.73) among 5 'emotive' expressions and a mean accuracy of 70% (F1 of 0.68) among 10 distinct facial action units, outperforming human raters.
},
keywords = {audio-video fusion, face & gesture, face processing, multimodal interfaces, WACV},
pubstate = {published},
tppubtype = {inproceedings}
}
Irfan Essa, Vivek Kwatra, Matthias Grundmann
Vector representation for video segmentation Patent
2018, (US Patent Application 14/587,420).
Links | BibTeX | Tags: computer vision, google, patents
@patent{2018-Essa-VRVS,
title = {Vector representation for video segmentation},
author = {Irfan Essa and Vivek Kwatra and Matthias Grundmann},
url = {https://patents.google.com/patent/US20180350131},
year = {2018},
date = {2018-12-06},
urldate = {2018-12-01},
publisher = {(US Patent Application # 14/587,420)},
howpublished = {US Patent # US20180350131A1},
note = {US Patent Application 14/587,420},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Caroline Pantofaru, Vinay Bettadapura, Krishna Bharat, Irfan Essa
Systems and methods for directing content generation using a first-person point-of-view device Patent
2018, (US Patent 10,110,850).
Abstract | Links | BibTeX | Tags: computer vision, google, patents
@patent{2018-Pantofaru-SMDCGUFPD,
title = {Systems and methods for directing content generation using a first-person point-of-view device},
author = {Caroline Pantofaru and Vinay Bettadapura and Krishna Bharat and Irfan Essa},
url = {https://patents.google.com/patent/US10110850},
year = {2018},
date = {2018-10-23},
urldate = {2018-10-01},
publisher = {(US Patent #10110850)},
abstract = {A method for localizing the attention of a user of a first-person point-of-view (FPPOV) device is disclosed. The method includes receiving data from an FPPOV device, the data being indicative of a first region-of-interest (ROI) of an event for a first time duration and a second ROI of the event for a second time duration. The method further include determining that a first camera from a plurality of cameras best captures the first ROI during the first time duration, and determining that a second camera from the plurality of cameras best captures the second ROI during the second time duration.
},
howpublished = {US Patent # US10110850B1},
note = {US Patent 10,110,850},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Jonathan C Balloch, Varun Agrawal, Irfan Essa, Sonia Chernova
Unbiasing Semantic Segmentation For Robot Perception using Synthetic Data Feature Transfer Technical Report
no. arXiv:1809.03676, 2018.
Abstract | Links | BibTeX | Tags: arXiv, robotics, scene understanding
@techreport{2018-Balloch-USSRPUSDFT,
title = {Unbiasing Semantic Segmentation For Robot Perception using Synthetic Data Feature Transfer},
author = {Jonathan C Balloch and Varun Agrawal and Irfan Essa and Sonia Chernova},
url = {https://doi.org/10.48550/arXiv.1809.03676},
doi = {10.48550/arXiv.1809.03676},
year = {2018},
date = {2018-09-01},
urldate = {2018-09-01},
journal = {arXiv},
number = {arXiv:1809.03676},
abstract = {Robot perception systems need to perform reliable image segmentation in real-time on noisy, raw perception data. State-of-the-art segmentation approaches use large CNN models and carefully constructed datasets; however, these models focus on accuracy at the cost of real-time inference. Furthermore, the standard semantic segmentation datasets are not large enough for training CNNs without augmentation and are not representative of noisy, uncurated robot perception data. We propose improving the performance of real-time segmentation frameworks on robot perception data by transferring features learned from synthetic segmentation data. We show that pretraining real-time segmentation architectures with synthetic segmentation data instead of ImageNet improves fine-tuning performance by reducing the bias learned in pretraining and closing the textit{transfer gap} as a result. Our experiments show that our real-time robot perception models pretrained on synthetic data outperform those pretrained on ImageNet for every scale of fine-tuning data examined. Moreover, the degree to which synthetic pretraining outperforms ImageNet pretraining increases as the availability of robot data decreases, making our approach attractive for robotics domains where dataset collection is hard and/or expensive.
},
howpublished = {arXiv:1809.03676},
keywords = {arXiv, robotics, scene understanding},
pubstate = {published},
tppubtype = {techreport}
}
Unaiza Ahsan, Rishi Madhok, Irfan Essa
Video Jigsaw: Unsupervised Learning of Spatiotemporal Context for Video Action Recognition Journal Article
In: arXiv, no. arXiv:1808.07507, 2018.
BibTeX | Tags: activity recognition, computer vision, machine learning
@article{2018-Ahsan-VJULSCVAR,
title = {Video Jigsaw: Unsupervised Learning of Spatiotemporal Context for Video Action Recognition},
author = {Unaiza Ahsan and Rishi Madhok and Irfan Essa},
year = {2018},
date = {2018-08-01},
journal = {arXiv},
number = {arXiv:1808.07507},
keywords = {activity recognition, computer vision, machine learning},
pubstate = {published},
tppubtype = {article}
}
Luke Drnach, Irfan Essa, Lena Ting
Identifying Gait Phases from Joint Kinematics during Walking with Switched Linear Dynamical Systems* Proceedings Article
In: IEEE International Conference on Biomedical Robotics and Biomechatronics (Biorob), pp. 1181-1186, 2018, ISSN: 2155-1782.
Abstract | Links | BibTeX | Tags: gait analysis, robotics
@inproceedings{2018-Drnach-IGPFJKDWWSLDS,
title = {Identifying Gait Phases from Joint Kinematics during Walking with Switched Linear Dynamical Systems*},
author = {Luke Drnach and Irfan Essa and Lena Ting},
url = {https://ieeexplore.ieee.org/document/8487216},
doi = {10.1109/BIOROB.2018.8487216},
issn = {2155-1782},
year = {2018},
date = {2018-08-01},
urldate = {2018-08-01},
booktitle = {IEEE International Conference on Biomedical Robotics and Biomechatronics (Biorob)},
pages = {1181-1186},
abstract = {Human-robot interaction (HRI) for gait rehabilitation would benefit from data-driven gait models that account for gait phases and gait dynamics. Here we address the current limitation in gait models driven by kinematic data, which do not model interlimb gait dynamics and have not been shown to precisely identify gait events. We used Switched Linear Dynamical Systems (SLDS) to model joint angle kinematic data from healthy individuals walking on a treadmill with normal gaits and with gaits perturbed by electrical stimulation. We compared the model-inferred gait phases to gait phases measured externally via a force plate. We found that SLDS models accounted for over 88% of the variation in each joint angle and labeled the joint kinematics with the correct gait phase with 84% precision on average. The transitions between hidden states matched measured gait events, with a median absolute difference of 25ms. To our knowledge, this is the first time that SLDS inferred gait phases have been validated by an external measure of gait, instead of against predefined gait phase durations. SLDS provide individual-specific representations of gait that incorporate both gait phases and gait dynamics. SLDS may be useful for developing control policies for HRI aimed at improving gait by allowing for changes in control to be precisely timed to different gait phases.
},
keywords = {gait analysis, robotics},
pubstate = {published},
tppubtype = {inproceedings}
}
Steven Hickson, Anelia Angelova, Irfan Essa, Rahul Sukthankar
Object category learning and retrieval with weak supervision Technical Report
no. arXiv:1801.08985, 2018.
Abstract | Links | BibTeX | Tags: arXiv, computer vision, machine learning, object detection
@techreport{2018-Hickson-OCLRWWS,
title = {Object category learning and retrieval with weak supervision},
author = {Steven Hickson and Anelia Angelova and Irfan Essa and Rahul Sukthankar},
url = {https://arxiv.org/abs/1801.08985
https://arxiv.org/pdf/1801.08985},
doi = {10.48550/arXiv.1801.08985},
year = {2018},
date = {2018-07-01},
urldate = {2018-07-01},
journal = {arXiv},
number = {arXiv:1801.08985},
abstract = {We consider the problem of retrieving objects from image data and learning to classify them into meaningful semantic categories with minimal supervision. To that end, we propose a fully differentiable unsupervised deep clustering approach to learn semantic classes in an end-to-end fashion without individual class labeling using only unlabeled object proposals. The key contributions of our work are 1) a kmeans clustering objective where the clusters are learned as parameters of the network and are represented as memory units, and 2) simultaneously building a feature representation, or embedding, while learning to cluster it. This approach shows promising results on two popular computer vision datasets: on CIFAR10 for clustering objects, and on the more complex and challenging Cityscapes dataset for semantically discovering classes which visually correspond to cars, people, and bicycles. Currently, the only supervision provided is segmentation objectness masks, but this method can be extended to use an unsupervised objectness-based object generation mechanism which will make the approach completely unsupervised.
},
howpublished = {arXiv:1801.08985},
keywords = {arXiv, computer vision, machine learning, object detection},
pubstate = {published},
tppubtype = {techreport}
}
Chiori Hori, Huda Alamri, Jue Wang, Gordon Winchern, Takaaki Hori, Anoop Cherian, Tim K Marks, Vincent Cartillier, Raphael Gontijo Lopes, Abhishek Das, Irfan Essa, Dhruv Batra, Devi Parikh
End-to-End Audio Visual Scene-Aware Dialog using Multimodal Attention-Based Video Features Journal Article
In: arXiv, no. arXiv:1806.08409, 2018.
BibTeX | Tags:
@article{2018-Hori-EAVSDUMAVF,
title = {End-to-End Audio Visual Scene-Aware Dialog using Multimodal Attention-Based Video Features},
author = {Chiori Hori and Huda Alamri and Jue Wang and Gordon Winchern and Takaaki Hori and Anoop Cherian and Tim K Marks and Vincent Cartillier and Raphael Gontijo Lopes and Abhishek Das and Irfan Essa and Dhruv Batra and Devi Parikh},
year = {2018},
date = {2018-06-01},
journal = {arXiv},
number = {arXiv:1806.08409},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Huda Alamri, Vincent Cartillier, Raphael Gontijo Lopes, Abhishek Das, Jue Wang, Irfan Essa, Dhruv Batra, Devi Parikh, Anoop Cherian, Tim K Marks, Chiori Hori
Audio Visual Scene-Aware Dialog (AVSD) Challenge at DSTC7 Technical Report
no. arXiv:1806.00525, 2018.
Abstract | Links | BibTeX | Tags: arXiv, embodied agents, multimedia, vision & language
@techreport{2018-Alamri-AVSDACD,
title = {Audio Visual Scene-Aware Dialog (AVSD) Challenge at DSTC7},
author = {Huda Alamri and Vincent Cartillier and Raphael Gontijo Lopes and Abhishek Das and Jue Wang and Irfan Essa and Dhruv Batra and Devi Parikh and Anoop Cherian and Tim K Marks and Chiori Hori},
url = {https://video-dialog.com/
https://arxiv.org/abs/1806.00525},
doi = {10.48550/arXiv.1806.00525},
year = {2018},
date = {2018-06-01},
urldate = {2018-06-01},
journal = {arXiv},
number = {arXiv:1806.00525},
abstract = {Scene-aware dialog systems will be able to have conversations with users about the objects and events around them. Progress on such systems can be made by integrating state-of-the-art technologies from multiple research areas including end-to-end dialog systems visual dialog, and video description. We introduce the Audio Visual Scene Aware Dialog (AVSD) challenge and dataset. In this challenge, which is one track of the 7th Dialog System Technology Challenges (DSTC7) workshop1, the task is to build a system that generates responses in a dialog about an input video
},
howpublished = {arXiv:1806.00525},
keywords = {arXiv, embodied agents, multimedia, vision & language},
pubstate = {published},
tppubtype = {techreport}
}
Aneeq Zia, Andrew Hung, Irfan Essa, Anthony Jarc
Surgical Activity Recognition in Robot-Assisted Radical Prostatectomy using Deep Learning Journal Article
In: arXiv, no. arXiv:1806.00466, 2018.
BibTeX | Tags:
@article{2018-Zia-SARRRPUDL,
title = {Surgical Activity Recognition in Robot-Assisted Radical Prostatectomy using Deep Learning},
author = {Aneeq Zia and Andrew Hung and Irfan Essa and Anthony Jarc},
year = {2018},
date = {2018-06-01},
journal = {arXiv},
number = {arXiv:1806.00466},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Aneeq Zia, Irfan Essa
Automated surgical skill assessment in RMIS training Journal Article
In: International Journal of Computer Assisted Radiology and Surgery, vol. 13, no. 5, pp. 731–739, 2018.
@article{2018-Zia-ASSART,
title = {Automated surgical skill assessment in RMIS training},
author = {Aneeq Zia and Irfan Essa},
url = {https://link.springer.com/article/10.1007/s11548-018-1735-5},
year = {2018},
date = {2018-03-01},
journal = {International Journal of Computer Assisted Radiology and Surgery},
volume = {13},
number = {5},
pages = {731--739},
publisher = {Springer},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Erkam Uzun, Simon Pak Ho Chung, Irfan Essa, Wenke Lee
rtCaptcha: A Real-Time CAPTCHA Based Liveness Detection System Proceedings Article
In: Network and Distributed System Security Symposium (NDSS), 2018.
BibTeX | Tags: information security
@inproceedings{2018-Uzun-RRCBLDS,
title = {rtCaptcha: A Real-Time CAPTCHA Based Liveness Detection System},
author = {Erkam Uzun and Simon Pak Ho Chung and Irfan Essa and Wenke Lee},
year = {2018},
date = {2018-03-01},
booktitle = {Network and Distributed System Security Symposium (NDSS)},
keywords = {information security},
pubstate = {published},
tppubtype = {inproceedings}
}
Matthias Grundmann, Vivek Kwatra, Irfan Essa
2018, (US Patent 9,888,180).
Links | BibTeX | Tags: computer vision, google, patents
@patent{2018-Grundmann-CCMERSDCSDVS,
title = {Cascaded camera motion estimation, rolling shutter detection, and camera shake detection for video stabilization},
author = {Matthias Grundmann and Vivek Kwatra and Irfan Essa},
url = {https://patents.google.com/patent/US9888180},
year = {2018},
date = {2018-02-06},
urldate = {2018-02-01},
publisher = {(US Patent #9888180)},
howpublished = {US Patent # US9888180},
note = {US Patent 9,888,180},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Aneeq Zia, Yachna Sharma, Vinay Bettadapura, Eric L Sarin, Irfan Essa
Video and accelerometer-based motion analysis for automated surgical skills assessment Journal Article
In: International Journal of Computer Assisted Radiology and Surgery, vol. 13, no. 3, pp. 443–455, 2018.
Links | BibTeX | Tags: activity assessment, activity recognition, IJCARS, surgical training
@article{2018-Zia-VAMAASSA,
title = {Video and accelerometer-based motion analysis for automated surgical skills assessment},
author = {Aneeq Zia and Yachna Sharma and Vinay Bettadapura and Eric L Sarin and Irfan Essa},
url = {https://link.springer.com/article/10.1007/s11548-018-1704-z},
doi = {10.1007/s11548-018-1704-z},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {International Journal of Computer Assisted Radiology and Surgery},
volume = {13},
number = {3},
pages = {443--455},
publisher = {Springer},
keywords = {activity assessment, activity recognition, IJCARS, surgical training},
pubstate = {published},
tppubtype = {article}
}
Daniel Castro, Steven Hickson, Patsorn Sangkloy, Bhavishya Mittal, Sean Dai, James Hays, Irfan Essa
Let's Dance: Learning From Online Dance Videos Journal Article
In: arXiv, no. arXiv:1801.07388, 2018.
BibTeX | Tags:
@article{2018-Castro-LDLFODV,
title = {Let's Dance: Learning From Online Dance Videos},
author = {Daniel Castro and Steven Hickson and Patsorn Sangkloy and Bhavishya Mittal and Sean Dai and James Hays and Irfan Essa},
year = {2018},
date = {2018-01-01},
journal = {arXiv},
number = {arXiv:1801.07388},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Unaiza Ahsan, Chen Sun, Irfan Essa
DiscrimNet: Semi-Supervised Action Recognition from Videos using Generative Adversarial Networks Journal Article
In: arXiv, no. arXiv:1801.07230, 2018.
BibTeX | Tags: activity recognition, computer vision, machine learning
@article{2018-Ahsan-DSARFVUGAN,
title = {DiscrimNet: Semi-Supervised Action Recognition from Videos using Generative Adversarial Networks},
author = {Unaiza Ahsan and Chen Sun and Irfan Essa},
year = {2018},
date = {2018-01-01},
journal = {arXiv},
number = {arXiv:1801.07230},
keywords = {activity recognition, computer vision, machine learning},
pubstate = {published},
tppubtype = {article}
}
Amirreza Shaban, Shray Bansal, Zhen Liu, Irfan Essa, Byron Boots
One-Shot Learning for Semantic Segmentation Proceedings Article
In: British Machine Vision Conference (BMVC), 2017.
Links | BibTeX | Tags: image segmentation, one-shot learning, semantic segmentation
@inproceedings{2017-Shaban-OLSS,
title = {One-Shot Learning for Semantic Segmentation},
author = {Amirreza Shaban and Shray Bansal and Zhen Liu and Irfan Essa and Byron Boots},
url = {http://www.bmva.org/bmvc/2017/papers/paper167/index.html},
doi = {10.5244/C.31.167},
year = {2017},
date = {2017-09-01},
booktitle = {British Machine Vision Conference (BMVC)},
keywords = {image segmentation, one-shot learning, semantic segmentation},
pubstate = {published},
tppubtype = {inproceedings}
}
Aneeq Zia, Yachna Sharma, Vinay Bettadapura, Eric Sarin, Irfan Essa
Video and Accelerometer-Based Motion Analysis for Automated Surgical Skills Assessment Proceedings Article
In: Information Processing in Computer-Assisted Interventions (IPCAI), 2017.
BibTeX | Tags: activity assessment, activity recognition, surgical training
@inproceedings{2017-Zia-VAMAASSA,
title = {Video and Accelerometer-Based Motion Analysis for Automated Surgical Skills Assessment},
author = {Aneeq Zia and Yachna Sharma and Vinay Bettadapura and Eric Sarin and Irfan Essa},
year = {2017},
date = {2017-06-01},
urldate = {2017-06-01},
booktitle = {Information Processing in Computer-Assisted Interventions (IPCAI)},
keywords = {activity assessment, activity recognition, surgical training},
pubstate = {published},
tppubtype = {inproceedings}
}
Other Publication Sites
A few more sites that aggregate research publications: Academic.edu, Bibsonomy, CiteULike, Mendeley.
Copyright/About
[Please see the Copyright Statement that may apply to the content listed here.]
This list of publications is produced by using the teachPress plugin for WordPress.