OCR分为文字检测和文字识别两个部分。
计算机视觉,OCR相关的顶会、重要论文。
2020-12-11 整理,共283篇论文。
2020年48篇
2019年45篇
2018年55篇
2017年35篇
2016年19篇
文章列表
@article{Chen2004,
author = {Chen, Datong and Odobez, Jean-marc},
doi = {10.1016/j.patcog.2003.06.001},
keywords = {mrf,svm,text localization,text recognition,text segmentation,video ocr},
pages = {595--608},
title = {{Text detection and recognition in images and video frames}},
volume = {37},
year = {2004}
@article{Low2004,
author = {Low, David G},
journal = {International Journal of Computer Vision},
pages = {91--110},
title = {{Distinctive image features from scale-invariant keypoints}},
url = {https://www.cs.ubc.ca/{~}lowe/papers/ijcv04.pdf},
year = {2004}
@article{Hintze2004,
author = {Hintze, John M. and Christ, Theodore J.},
doi = {10.1080/02796015.2004.12086243},
issn = {02796015},
journal = {School Psychology Review},
number = {2},
pages = {204--217},
title = {{An examination of variability as a function of passage variance in CBM progress monitoring}},
volume = {33},
year = {2004}
@article{Ye2005,
author = {Ye, Qixiang and Huang, Qingming and Gao, Wen and Zhao, Debin},
doi = {10.1016/j.imavis.2005.01.004},
keywords = {feature combination,multiscale wavelet feature,svm classification,text detection},
pages = {565--576},
title = {{Fast and robust text detection in images and video frames}},
volume = {23},
year = {2005}
@article{Sangeetha2006,
author = {Sangeetha, V. and Prasad, K. J.Rajendra},
doi = {10.1002/chin.200650130},
issn = {03764699},
journal = {Indian Journal of Chemistry - Section B Organic and Medicinal Chemistry},
keywords = {1-hydroxycarbazole-2-carbaldehydes,2-acetylfuro carbazoles,Benzo carbazoles,Phenyl oxopyranocarbazoles,o-aminothiophenol},
number = {8},
pages = {1951--1954},
title = {{Syntheses of novel derivatives of 2-acetylfuro[2,3-a]carbazoles, benzo[1,2-b]-1,4-thiazepino[2,3-a]carbazoles and 1-acetyloxycarbazole-2- carbaldehydes}},
volume = {45},
year = {2006}
@article{Jan2007,
archivePrefix = {arXiv},
arxivId = {arXiv:1904.09405v2},
author = {Jan, C V and Wang, Qingqing and Huang, Ye and Jia, Wenjing and He, Xiangjian and Blumenstein, Michael and Lyu, Shujing and Lu, Yue},
eprint = {arXiv:1904.09405v2},
pages = {1--14},
title = {{FACLSTM : ConvLSTM with Focused Attention for Scene Text Recognition}},
year = {2007}
@article{Hess2007,
author = {Hess, Robin and Fern, Alan},
doi = {10.1109/CVPR.2007.382989},
isbn = {1424411807},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
title = {{Improved video registration using non-distinctive local image features}},
year = {2007}
@article{Frinken2009,
author = {Frinken, Volkmar and Fischer, Andreas and Manmatha, R and Bunke, Horst and Mathematics, Applied},
keywords = {classical,done using a modification,in conjunction with a,of the ctc token,outperforms not only a,passing algorithm,recurrent neural network,that the proposed systems,the keyword spotting is,to appear in the,training set,we demonstrate},
pages = {1--14},
title = {{on Recurrent Neural Networks}},
year = {2009}
@article{Enslen2009,
author = {Enslen, Eric and Hill, Emily and Pollock, Lori and Vijay-Shanker, K.},
doi = {10.1109/MSR.2009.5069482},
isbn = {9781424434930},
journal = {Proceedings of the 2009 6th IEEE International Working Conference on Mining Software Repositories, MSR 2009},
keywords = {[Electronic Manuscript]},
pages = {71--80},
title = {{Mining source code to automatically split identifiers for software analysis}},
year = {2009}
@article{Wang2011,
author = {Wang, Kai and Babenko, Boris and Belongie, Serge},
doi = {10.1109/ICCV.2011.6126402},
isbn = {9781457711015},
journal = {Proceedings of the IEEE International Conference on Computer Vision},
number = {4},
pages = {1457--1464},
title = {{End-to-end scene text recognition}},
year = {2011}
@article{Coates2011,
author = {Coates, Adam and Carpenter, Blake and Case, Carl and Satheesh, Sanjeev and Suresh, Bipin and Wang, Tao and Wu, David J. and Ng, Andrew Y.},
doi = {10.1109/ICDAR.2011.95},
isbn = {9780769545202},
issn = {15205363},
journal = {Proceedings of the International Conference on Document Analysis and Recognition, ICDAR},
keywords = {Robust reading,character recognition,feature learning,photo OCR},
pages = {440--445},
title = {{Text detection and character recognition in scene images with unsupervised feature learning}},
year = {2011}
@article{Elagouni2012,
author = {Elagouni, Khaoula and Garcia, Christophe and Mamalet, Franck and S{\'{e}}billot, Pascale},
doi = {10.1007/978-3-642-33266-1_22},
isbn = {9783642332654},
issn = {03029743},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {CTC,ConvNet,LSTM,Video text recognition,multi-scale image scanning},
number = {PART 2},
pages = {172--179},
title = {{Text recognition in videos using a recurrent connectionist approach}},
volume = {7553 LNCS},
year = {2012}
@article{Sermanet2012,
archivePrefix = {arXiv},
arxivId = {1204.3968},
author = {Sermanet, Pierre and Chintala, Soumith and Lecun, Yann},
eprint = {1204.3968},
isbn = {9784990644109},
issn = {10514651},
journal = {Proceedings - International Conference on Pattern Recognition},
pages = {3288--3291},
title = {{Convolutional neural networks applied to house numbers digit classification}},
year = {2012}
@article{Zhang2013,
archivePrefix = {arXiv},
arxivId = {arXiv:1711.04249v1},
author = {Zhang, Sheng and Liu, Yuliang and Jin, Lianwen and Luo, Canjie},
eprint = {arXiv:1711.04249v1},
title = {{Feature Enhancement Network: A Refined Scene Text Detector}},
year = {2013}
@article{Mishra2013,
author = {Mishra, Anand and Alahari, Karteek and Jawahar, C V},
pages = {1--11},
title = {{Scene Text Recognition using Higher Order Language Priors To cite this version : Scene Text Recognition using Higher Order Language Priors}},
year = {2013}
@article{Bissacco2013,
author = {Bissacco, Alessandro and Cummins, Mark and Netzer, Yuval and Neven, Hartmut},
doi = {10.1109/ICCV.2013.102},
isbn = {9781479928392},
journal = {Proceedings of the IEEE International Conference on Computer Vision},
keywords = {OCR,deep learning,scene text,text recognition},
pages = {785--792},
title = {{PhotoOCR: Reading text in uncontrolled conditions}},
year = {2013}
@article{Gonzalez2013,
author = {Gonz{\'{a}}lez, {\'{A}}lvaro and Bergasa, Luis Miguel},
doi = {10.1016/j.imavis.2013.01.003},
issn = {02628856},
journal = {Image and Vision Computing},
keywords = {Character recognition,Character segmentation,Natural images,Scene text detection,Text detection,Text recognition},
number = {3},
pages = {255--274},
title = {{A text reading algorithm for natural images}},
volume = {31},
year = {2013}
@article{Bakhti2013,
author = {Bakhti, Mostafa and Snaidero, Nicolas and Schneider, David and Aggarwal, Shweta and M{\"{o}}bius, Wiebke and Janshoff, Andreas and Eckhardt, Matthias and Nave, Klaus Armin and Simons, Mikael},
doi = {10.1073/pnas.1220104110},
issn = {00278424},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
keywords = {Adhesiveness,Myelination,PLP},
number = {8},
pages = {3143--3148},
pmid = {23382229},
title = {{Loss of electrostatic cell-surface repulsion mediates myelin membrane adhesion and compaction in the central nervous system}},
volume = {110},
year = {2013}
@article{Graves2014,
author = {Graves, Alex and Jaitly, Navdeep},
isbn = {9781634393973},
journal = {31st International Conference on Machine Learning, ICML 2014},
pages = {3771--3779},
title = {{Towards end-to-end speech recognition with recurrent neural networks}},
volume = {5},
year = {2014}
@article{Bluche2014,
author = {Bluche, Th{\'{e}}odore and Ney, Hermann and Kermorvant, Christopher},
doi = {10.1007/978-3-319-11397-5_15},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Deep neural networks,Handwriting recognition,Recurrent neural networks},
pages = {199--219},
title = {{A comparison of sequence-trained deep neural networks and recurrent neural networks optical modeling for handwriting recognition}},
volume = {8791},
year = {2014}
@article{Liu2015,
author = {Liu, Yuliang and Jin, Lianwen},
pages = {1962--1969},
title = {{Deep Matching Prior Network : Toward Tighter Multi-oriented Text Detection}},
year = {2015}
@article{He2015,
author = {He, Dafang and Yang, Xiao and Liang, Chen and Zhou, Zihan and Ororbia, Alex G and Kifer, Daniel and Giles, C Lee},
number = {1},
title = {{Multi-scale FCN with Cascaded Instance Aware Segmentation for Arbitrary Oriented Word Spotting In The Wild}},
year = {2015}
@article{Sahu2015,
archivePrefix = {arXiv},
arxivId = {1511.04176},
author = {Sahu, Devendra Kumar and Sukhwani, Mohak},
eprint = {1511.04176},
pages = {1--9},
title = {{Sequence to Sequence Learning for Optical Character Recognition}},
url = {http://arxiv.org/abs/1511.04176},
year = {2015}
@article{Wang2015,
archivePrefix = {arXiv},
arxivId = {1507.03196},
author = {Wang, Zhangyang and Yang, Jianchao and Jin, Hailin and Shechtman, Eli and Agarwala, Aseem and Brandt, Jonathan and Huang, Thomas S.},
doi = {10.1145/2733373.2806219},
eprint = {1507.03196},
isbn = {9781450334594},
journal = {MM 2015 - Proceedings of the 2015 ACM Multimedia Conference},
keywords = {Deep Learning,Domain Adaptation,Model Compression,Visual Font Recognition},
pages = {451--459},
title = {{DeepFont: Identify your font from an image}},
year = {2015}
@article{Visin2015,
archivePrefix = {arXiv},
arxivId = {1505.00393},
author = {Visin, Francesco and Kastner, Kyle and Cho, Kyunghyun and Matteucci, Matteo and Courville, Aaron and Bengio, Yoshua},
eprint = {1505.00393},
pages = {1--9},
title = {{ReNet: A Recurrent Neural Network Based Alternative to Convolutional Networks}},
url = {http://arxiv.org/abs/1505.00393},
year = {2015}
@article{Yousef2015,
archivePrefix = {arXiv},
arxivId = {arXiv:1812.11894v1},
author = {Yousef, Mohamed and Hussain, Khaled F and Mohammed, Usama S},
eprint = {arXiv:1812.11894v1},
number = {8},
pages = {1--13},
title = {{Accurate , Data-Efficient , Unconstrained Text Recognition with Convolutional Neural Networks}},
volume = {14},
year = {2015}
@article{Jaderberg2015,
archivePrefix = {arXiv},
arxivId = {1412.5903},
author = {Jaderberg, Max and Simonyan, Karen and Vedaldi, Andrea and Zisserman, Andrew},
eprint = {1412.5903},
journal = {3rd International Conference on Learning Representations, ICLR 2015 - Conference Track Proceedings},
pages = {1--10},
title = {{Deep structured output learning for unconstrained text recognition}},
year = {2015}
@article{Su2015,
author = {Su, Bolan and Lu, Shijian},
doi = {10.1007/978-3-319-16865-4_3},
isbn = {9783319168647},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
pages = {35--48},
title = {{Accurate scene text recognition based on recurrent neural network}},
volume = {9003},
year = {2015}
@article{Sønderby2015,
archivePrefix = {arXiv},
arxivId = {1509.05329},
author = {S{\o}nderby, S{\o}ren Kaae and S{\o}nderby, Casper Kaae and Maal{\o}e, Lars and Winther, Ole},
eprint = {1509.05329},
title = {{Recurrent Spatial Transformer Networks}},
url = {http://arxiv.org/abs/1509.05329},
year = {2015}
@article{Jaderberg2015a,
archivePrefix = {arXiv},
arxivId = {1506.02025},
author = {Jaderberg, Max and Simonyan, Karen and Zisserman, Andrew and Kavukcuoglu, Koray},
eprint = {1506.02025},
issn = {10495258},
journal = {Advances in Neural Information Processing Systems},
pages = {2017--2025},
title = {{Spatial transformer networks}},
volume = {2015-Janua},
year = {2015}
@article{Zhang2016,
archivePrefix = {arXiv},
arxivId = {1604.04018},
author = {Zhang, Zheng and Zhang, Chengquan and Shen, Wei and Yao, Cong and Liu, Wenyu and Bai, Xiang},
doi = {10.1109/CVPR.2016.451},
eprint = {1604.04018},
isbn = {9781467388504},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {4159--4167},
title = {{Multi-oriented Text Detection with Fully Convolutional Networks}},
volume = {2016-December},
year = {2016}
@article{Yao2016,
archivePrefix = {arXiv},
arxivId = {1606.09002},
author = {Yao, Cong and Bai, Xiang and Sang, Nong and Zhou, Xinyu and Zhou, Shuchang and Cao, Zhimin},
eprint = {1606.09002},
keywords = {fully convolutional network,holistic prediction,natural images,scene text detection},
pages = {1--10},
title = {{Scene Text Detection via Holistic, Multi-Channel Prediction}},
url = {http://arxiv.org/abs/1606.09002},
year = {2016}
@article{He2016a,
archivePrefix = {arXiv},
arxivId = {1506.04395},
author = {He, Pan and Huang, Weilin and Qiao, Yu and Loy, Chen Change and Tang, Xiaoou},
eprint = {1506.04395},
isbn = {9781577357605},
journal = {30th AAAI Conference on Artificial Intelligence, AAAI 2016},
pages = {3501--3508},
title = {{Reading scene text in deep convolutional sequences}},
year = {2016}
@article{Yang2016,
archivePrefix = {arXiv},
arxivId = {1611.07385},
author = {Yang, Xiao and He, Dafang and Huang, Wenyi and Zhou, Zihan and Ororbia, Alex and Kifer, Dan and Giles, C. Lee},
eprint = {1611.07385},
number = {2010},
title = {{Smart Library: Identifying Books in a Library using Richly Supervised Deep Scene Text Reading}},
url = {http://arxiv.org/abs/1611.07385},
year = {2016}
@article{Gomez2016,
archivePrefix = {arXiv},
arxivId = {1407.7504},
author = {Gomez, Lluis and Karatzas, Dimosthenis},
doi = {10.1007/s10032-016-0274-2},
eprint = {1407.7504},
issn = {14332825},
journal = {International Journal on Document Analysis and Recognition},
keywords = {Detection,Hierarchical grouping,Perceptual organization,Scene text,Segmentation},
number = {4},
pages = {335--349},
title = {{A fast hierarchical method for multi-script and arbitrary oriented scene text extraction}},
volume = {19},
year = {2016}
@article{Shi2016a,
archivePrefix = {arXiv},
arxivId = {1603.03915},
author = {Shi, Baoguang and Wang, Xinggang and Lyu, Pengyuan and Yao, Cong and Bai, Xiang},
doi = {10.1109/CVPR.2016.452},
eprint = {1603.03915},
isbn = {9781467388504},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {4168--4176},
title = {{Robust Scene Text Recognition with Automatic Rectification}},
volume = {2016-Decem},
year = {2016}
@article{Lee2016a,
archivePrefix = {arXiv},
arxivId = {1603.03101},
author = {Lee, Chen Yu and Osindero, Simon},
doi = {10.1109/CVPR.2016.245},
eprint = {1603.03101},
isbn = {9781467388504},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
number = {3},
pages = {2231--2239},
title = {{Recursive Recurrent Nets with Attention Modeling for OCR in the Wild}},
volume = {2016-December},
year = {2016}
@article{He2016b,
archivePrefix = {arXiv},
arxivId = {1506.04395},
author = {He, Pan and Huang, Weilin and Qiao, Yu and Loy, Chen Change and Tang, Xiaoou},
eprint = {1506.04395},
isbn = {9781577357605},
journal = {30th AAAI Conference on Artificial Intelligence, AAAI 2016},
pages = {3501--3508},
title = {{Reading scene text in deep convolutional sequences}},
year = {2016}
@article{He2016,
archivePrefix = {arXiv},
arxivId = {1510.03283},
author = {He, Tong and Huang, Weilin and Qiao, Yu and Yao, Jian},
doi = {10.1109/TIP.2016.2547588},
eprint = {1510.03283},
issn = {10577149},
journal = {IEEE Transactions on Image Processing},
keywords = {Maximally stable extremal regions,convolutional neural networks,multi-level supervised information,multi-task learning,text detector},
number = {6},
pages = {2529--2541},
pmid = {27093723},
title = {{Text-Attentional Convolutional Neural Network for Scene Text Detection}},
volume = {25},
year = {2016}
@article{Shi2016,
archivePrefix = {arXiv},
arxivId = {1603.03915},
author = {Shi, Baoguang and Wang, Xinggang and Lyu, Pengyuan and Yao, Cong and Bai, Xiang},
doi = {10.1109/CVPR.2016.452},
eprint = {1603.03915},
isbn = {9781467388504},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {4168--4176},
title = {{Robust Scene Text Recognition with Automatic Rectification}},
volume = {2016-Decem},
year = {2016}
@article{Liu2016,
author = {Liu, Wei and Chen, Chaofeng and Wong, Kwan Yee K. and Su, Zhizhong and Han, Junyu},
doi = {10.5244/C.30.43},
journal = {British Machine Vision Conference 2016, BMVC 2016},
pages = {43.1--43.13},
title = {{STAR-Net: A spatial attention residue network for scene text recognition}},
volume = {2016-Septe},
year = {2016}
@article{Jaderberg2016,
archivePrefix = {arXiv},
arxivId = {1412.1842},
author = {Jaderberg, Max and Simonyan, Karen and Vedaldi, Andrea and Zisserman, Andrew},
doi = {10.1007/s11263-015-0823-z},
eprint = {1412.1842},
issn = {15731405},
journal = {International Journal of Computer Vision},
keywords = {Convolutional neural networks,Deep learning,Synthetic data,Text detection,Text recognition,Text retrieval,Text spotting},
number = {1},
pages = {1--20},
title = {{Reading Text in the Wild with Convolutional Neural Networks}},
volume = {116},
year = {2016}
@article{Lee2016,
archivePrefix = {arXiv},
arxivId = {1603.03101},
author = {Lee, Chen Yu and Osindero, Simon},
doi = {10.1109/CVPR.2016.245},
eprint = {1603.03101},
isbn = {9781467388504},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
number = {3},
pages = {2231--2239},
title = {{Recursive Recurrent Nets with Attention Modeling for OCR in the Wild}},
volume = {2016-Decem},
year = {2016}
@article{Veit2016,
archivePrefix = {arXiv},
arxivId = {1601.07140},
author = {Veit, Andreas and Matera, Tomas and Neumann, Lukas and Matas, Jiri and Belongie, Serge},
eprint = {1601.07140},
title = {{COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images}},
url = {http://arxiv.org/abs/1601.07140},
year = {2016}
@inproceedings{Dong2017,
author = {Dong, Meng and He, Dongliang and Luo, Chong and Liu, Dong and Zeng, Wenjun},
booktitle = {British Machine Vision Conference 2017, BMVC 2017},
doi = {10.5244/c.31.175},
isbn = {190172560X},
pages = {1--12},
title = {{A CNn-based approach for automatic license plate recognition in the wild}},
year = {2017}
@misc{Zhu2017,
author = {Zhu, Xiangyu and Jiang, Yingying and Yang, Shuli and Wang, Xiaobing and Li, Wei and Fu, Pei and Wang, Hua and Luo, Zhenbo},
booktitle = {arXiv},
keywords = {CTPN,Deep Residual Networks,Scene text detection},
number = {1},
title = {{Deep residual text detection network for scene text}},
year = {2017}
@misc{Zhu2017a,
author = {Zhu, Xiangyu and Jiang, Yingying and Yang, Shuli and Wang, Xiaobing and Li, Wei and Fu, Pei and Wang, Hua and Luo, Zhenbo},
booktitle = {arXiv},
keywords = {CTPN,Deep Residual Networks,Scene text detection},
pages = {7269--7278},
title = {{Deep residual text detection network for scene text}},
year = {2017}
@misc{Sain2017,
author = {Sain, Aneeshan and Bhunia, Ayan Kumar and Roy, Partha Pratim and Pal, Umapada},
booktitle = {arXiv},
keywords = {Fouier-Laplacian,Hidden Markov Model,Scene text and Video text retrieval,Skeletonization.1,Text extraction},
title = {{Multi-oriented text detection and verification in video frames and scene images}},
year = {2017}
@article{Zhong2017,
author = {Zhong, Zhuoyao and Sun, Lei and Huo, Qiang},
title = {{An Anchor-Free Region Proposal Network for Faster R-CNN based Text Detection Approaches Anchor-free Region Proposal Network}},
year = {2017}
@article{Bai2017,
author = {Bai, Xiang and Shi, Baoguang and Zhang, Chengquan and Cai, Xuan and Qi, Li},
doi = {10.1016/j.patcog.2016.12.005},
number = {March 2016},
pages = {437--446},
title = {{Text / non-text image classi fi cation in the wild with convolutional neural networks}},
volume = {66},
year = {2017}
@article{He2017,
archivePrefix = {arXiv},
arxivId = {1709.00138},
author = {He, Pan and Huang, Weilin and He, Tong and Zhu, Qile and Qiao, Yu and Li, Xiaolin},
doi = {10.1109/ICCV.2017.331},
eprint = {1709.00138},
isbn = {9781538610329},
issn = {15505499},
journal = {Proceedings of the IEEE International Conference on Computer Vision},
pages = {3066--3074},
title = {{Single Shot Text Detector with Regional Attention}},
volume = {2017-Octob},
year = {2017}
@article{Liao2017,
archivePrefix = {arXiv},
arxivId = {1611.06779},
author = {Liao, Minghui and Shi, Baoguang and Bai, Xiang and Wang, Xinggang and Liu, Wenyu},
eprint = {1611.06779},
journal = {31st AAAI Conference on Artificial Intelligence, AAAI 2017},
pages = {4161--4167},
title = {{TextBoxes: A fast text detector with a single deep neural network}},
year = {2017}
@article{Zhan2017,
archivePrefix = {arXiv},
arxivId = {1710.03112},
author = {Zhan, Hongjian and Wang, Qingqing and Lu, Yue},
doi = {10.1007/978-3-319-70136-3_62},
eprint = {1710.03112},
isbn = {9783319701356},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Connectionist temporal classification,Convolutional neural network,Digit string recognition,End to end,Recurrent neural network},
pages = {583--591},
title = {{Handwritten digit string recognition by combination of residual network and RNN-CTC}},
volume = {10639 LNCS},
year = {2017}
@article{Wojna2017,
archivePrefix = {arXiv},
arxivId = {1704.03549},
author = {Wojna, Zbigniew and Gorban, Alexander N. and Lee, Dar Shyang and Murphy, Kevin and Yu, Qian and Li, Yeqing and Ibarz, Julian},
doi = {10.1109/ICDAR.2017.143},
eprint = {1704.03549},
isbn = {9781538635865},
issn = {15205363},
journal = {Proceedings of the International Conference on Document Analysis and Recognition, ICDAR},
pages = {844--850},
title = {{Attention-Based Extraction of Structured Information from Street View Imagery}},
volume = {1},
year = {2017}
@article{Bluche2017,
archivePrefix = {arXiv},
arxivId = {1604.03286},
author = {Bluche, Theodore and Louradour, Jeroome and Messina, Ronaldo},
doi = {10.1109/ICDAR.2017.174},
eprint = {1604.03286},
isbn = {9781538635865},
issn = {15205363},
journal = {Proceedings of the International Conference on Document Analysis and Recognition, ICDAR},
pages = {1050--1055},
title = {{Scan, Attend and Read: End-To-End Handwritten Paragraph Recognition with MDLSTM Attention}},
volume = {1},
year = {2017}
@article{He2017a,
author = {He, Wenhao and Fei, Xu-yao Zhang and Liu, Yin Cheng-lin},
journal = {Iccv},
pages = {745--753},
title = {{Deep Direct Regression for Multi-Oriented Scene Text Detection National Laboratory of Pattern Recognition ( NLPR )}},
year = {2017}
@article{Jiang2017,
archivePrefix = {arXiv},
arxivId = {1706.09579},
author = {Jiang, Yingying and Zhu, Xiangyu and Wang, Xiaobing and Yang, Shuli and Li, Wei and Wang, Hua and Fu, Pei and Luo, Zhenbo},
eprint = {1706.09579},
journal = {arXiv},
number = {c},
pages = {1--8},
title = {{R2CNN: Rotational region CNN for orientation robust scene text detection}},
volume = {1},
year = {2017}
@article{Wang2017,
archivePrefix = {arXiv},
arxivId = {1707.03124},
author = {Wang, Xinlong and Man, Zhipeng and You, Mingyu and Shen, Chunhua},
eprint = {1707.03124},
journal = {arXiv},
keywords = {Bidirectional recurrent neural network (BRNN),Convolutional neural network (CNN),Depthwise separate convolution,Generative adversarial network (GAN),Long short-term memory (LSTM)},
pages = {1--13},
title = {{Adversarial generation of training examples: Applications to moving vehicle license plate recognition}},
year = {2017}
@article{Masood2017,
archivePrefix = {arXiv},
arxivId = {1703.07330},
author = {Masood, Syed Zain and Shu, Guang and Dehghan, Afshin and Ortiz, Enrique G.},
eprint = {1703.07330},
journal = {arXiv},
title = {{License plate detection and recognition using deeply learned convolutional neural networks}},
year = {2017}
@article{Gao2017,
archivePrefix = {arXiv},
arxivId = {1709.04303},
author = {Gao, Yunze and Chen, Yingying and Wang, Jinqiao and Lu, Hanqing},
eprint = {1709.04303},
journal = {arXiv},
number = {c},
title = {{Reading scene text with attention convolutional sequence modeling}},
volume = {1},
year = {2017}
@article{Yin2017,
archivePrefix = {arXiv},
arxivId = {1709.01727},
author = {Yin, Fei and Wu, Yi Chao and Zhang, Xu Yao and Liu, Cheng Lin},
eprint = {1709.01727},
journal = {arXiv},
title = {{Scene text recognition with sliding convolutional character models}},
year = {2017}
@article{Cheng2017,
archivePrefix = {arXiv},
arxivId = {1709.02054},
author = {Cheng, Zhanzhan and Bai, Fan and Xu, Yunlu and Zheng, Gang and Pu, Shiliang and Zhou, Shuigeng},
doi = {10.1109/ICCV.2017.543},
eprint = {1709.02054},
isbn = {9781538610329},
issn = {15505499},
journal = {Proceedings of the IEEE International Conference on Computer Vision},
pages = {5086--5094},
title = {{Focusing Attention: Towards Accurate Text Recognition in Natural Images}},
volume = {2017-Octob},
year = {2017}
@article{Bartz2017,
archivePrefix = {arXiv},
arxivId = {1707.08831},
author = {Bartz, Christian and Yang, Haojin and Meinel, Christoph},
eprint = {1707.08831},
journal = {arXiv},
title = {{STN-OCR: A single Neural Network for Text Detection and Text Recognition}},
year = {2017}
@article{Shi2017,
archivePrefix = {arXiv},
arxivId = {1507.05717},
author = {Shi, Baoguang and Bai, Xiang and Yao, Cong},
doi = {10.1109/TPAMI.2016.2646371},
eprint = {1507.05717},
issn = {01628828},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Sequence recognition,convolutional neural network,long-short term memory,neural network,optical music recognition,scene text recognition},
number = {11},
pages = {2298--2304},
pmid = {28055850},
title = {{An End-to-End Trainable Neural Network for Image-Based Sequence Recognition and Its Application to Scene Text Recognition}},
volume = {39},
year = {2017}
@article{Lin2017,
archivePrefix = {arXiv},
arxivId = {1612.03897},
author = {Lin, Chen Hsuan and Lucey, Simon},
doi = {10.1109/CVPR.2017.242},
eprint = {1612.03897},
isbn = {9781538604571},
journal = {Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017},
pages = {2252--2260},
title = {{Inverse compositional spatial transformer networks}},
volume = {2017-Janua},
year = {2017}
@article{Bae2017,
archivePrefix = {arXiv},
arxivId = {arXiv:1611.06345v4},
author = {Bae, Woong and Yoo, Jaejun and Ye, Jong Chul},
eprint = {arXiv:1611.06345v4},
journal = {Computer Vision and Pattern Recognition},
pages = {145--153},
title = {{Beyond Deep Residual Learning for Image Restoration}},
year = {2017}
@article{Wu2017,
author = {Wu, Songtao and Zhong, Shenghua and Liu, Yan},
doi = {10.1007/s11042-017-4440-4},
issn = {15737721},
journal = {Multimedia Tools and Applications},
keywords = {Convolutional neural networks,Image steganalysis,Residual learning},
number = {January},
pages = {1--17},
publisher = {Multimedia Tools and Applications},
title = {{Deep residual learning for image steganalysis}},
year = {2017}
@article{Li2017,
author = {Li, Hui and Wang, Peng and Shen, Chunhua},
journal = {arXiv},
number = {2},
pages = {5238--5246},
title = {{Towards End-to-end Text Spotting with Convolutional Recurrent Neural Networks}},
year = {2017}
@article{Ren2017,
archivePrefix = {arXiv},
arxivId = {1506.01497},
author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
doi = {10.1109/TPAMI.2016.2577031},
eprint = {1506.01497},
issn = {01628828},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Object detection,convolutional neural network,region proposal},
number = {6},
pages = {1137--1149},
pmid = {27295650},
title = {{Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks}},
volume = {39},
year = {2017}
@article{Shrivastava2017,
author = {Shrivastava, Abhinav and Wang, Xiaolong and Gupta, Abhinav},
journal = {arXiv},
pages = {2606--2615},
title = {{A-FaST-RCNN: Hard positive generation via adversary for object detection}},
year = {2017}
@article{Wang2017a,
author = {Wang, Xinlong and Xiao, Tete and Jiang, Yuning and Shao, Shuai and Sun, Jian and Shen, Chunhua},
journal = {arXiv},
pages = {7774--7783},
title = {{Repulsion loss: Detecting pedestrians in a crowd}},
year = {2017}
@inproceedings{Lyu2017,
archivePrefix = {arXiv},
arxivId = {1706.08789},
author = {Lyu, Pengyuan and Bai, Xiang and Yao, Cong and Zhu, Zhen and Huang, Tengteng and Liu, Wenyu},
booktitle = {Proceedings of the International Conference on Document Analysis and Recognition, ICDAR},
doi = {10.1109/ICDAR.2017.181},
eprint = {1706.08789},
isbn = {9781538635865},
issn = {15205363},
keywords = {Auto-Encoder,Chinese Calligraphy Synthesis,GAN},
pages = {1095--1100},
title = {{Auto-Encoder Guided GAN for Chinese Calligraphy Synthesis}},
volume = {1},
year = {2017}
@inproceedings{Xiao2017,
archivePrefix = {arXiv},
arxivId = {1705.05207},
author = {Xiao, Xuefeng and Yang, Yafeng and Ahmad, Tasweer and Jin, Lianwen and Chang, Tianhai},
booktitle = {Proceedings of the International Conference on Document Analysis and Recognition, ICDAR},
doi = {10.1109/ICDAR.2017.150},
eprint = {1705.05207},
isbn = {9781538635865},
issn = {15205363},
keywords = {CNN Compression,Convolutional neural network,Online handwritten Chinese character recognition},
pages = {891--895},
title = {{Design of a Very Compact CNN Classifier for Online Handwritten Chinese Character Recognition Using DropWeight and Global Pooling}},
volume = {1},
year = {2017}
@article{Kang2017,
author = {Kang, Chulmoo and Kim, Gunhee and Yoo, Suk I.},
journal = {31st AAAI Conference on Artificial Intelligence, AAAI 2017},
number = {1},
pages = {4103--4110},
title = {{Detection and recognition of text embedded in online images via neural context models}},
year = {2017}
@article{Sui2018,
archivePrefix = {arXiv},
arxivId = {1811.08611},
author = {Sui, Wanchen and Zhang, Qing and Yang, Jun and Chu, Wei},
doi = {10.1109/ICPR.2018.8545047},
eprint = {1811.08611},
isbn = {9781538637883},
issn = {10514651},
journal = {Proceedings - International Conference on Pattern Recognition},
pages = {2233--2238},
title = {{A Novel Integrated Framework for Learning both Text Detection and Recognition}},
volume = {2018-August},
year = {2018}
@article{Wang2018,
archivePrefix = {arXiv},
arxivId = {1812.09809},
author = {Wang, Zi Rui and Du, Jun and Wang, Jia Ming},
eprint = {1812.09809},
journal = {arXiv},
keywords = {Adaptation,Hybrid language model,Offline handwritten Chinese text recognition,Parsimonious HMM,State tying,Writer-aware CNN},
pages = {1--32},
title = {{Writer-Aware CNN for Parsimonious HMM-Based Offline Handwritten Chinese Text Recognition}},
year = {2018}
@inproceedings{Xue2018,
archivePrefix = {arXiv},
arxivId = {1807.03547},
author = {Xue, Chuhui and Lu, Shijian and Zhan, Fangneng},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-030-01270-0_22},
eprint = {1807.03547},
isbn = {9783030012694},
issn = {16113349},
keywords = {Data augmentation,Deep network models,Scene text detection,Semantics-aware detection},
pages = {370--387},
title = {{Accurate Scene Text Detection Through Border Semantics Awareness and Bootstrapping}},
volume = {11220 LNCS},
year = {2018}
@misc{Yuan2018,
archivePrefix = {arXiv},
arxivId = {1803.00085},
author = {Yuan, Tai Ling and Zhu, Zhe and Xu, Kun and Li, Cheng Jun and Hu, Shi Min},
booktitle = {arXiv},
eprint = {1803.00085},
title = {{Chinese text in the wild}},
year = {2018}
@inproceedings{Prasad2018,
author = {Prasad, Shitala and Kong, Adams Wai Kin},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-030-01270-0_33},
isbn = {9783030012694},
issn = {16113349},
keywords = {Deep learning,Natural Scenes,Object detection,RCNN,Text detection},
number = {Cv},
pages = {559--576},
title = {{Using Object Information for Spotting Text}},
volume = {11220 LNCS},
year = {2018}
@article{Liu2018,
archivePrefix = {arXiv},
arxivId = {arXiv:1906.02371v3},
author = {Liu, Yuliang and Zhang, Sheng and Jin, Lianwen and Xie, Lele and Wu, Yaqiang and Wang, Zhepeng and Engineering, Information},
eprint = {arXiv:1906.02371v3},
title = {{Omnidirectional Scene Text Detection with Sequential-free Box Discretization}},
year = {2018}
@article{Chowdhury2018,
archivePrefix = {arXiv},
arxivId = {arXiv:1807.07965v2},
author = {Chowdhury, Arindam and Vig, Lovekesh},
eprint = {arXiv:1807.07965v2},
pages = {1--11},
title = {{arXiv : 1807 . 07965v2 [ cs . CL ] 26 Jul 2018 An Efficient End-to-End Neural Model for Handwritten Text Recognition}},
year = {2018}
@article{Yuan2018a,
archivePrefix = {arXiv},
arxivId = {1807.01884},
author = {Yuan, Qi and Zhang, Bingwang and Li, Haojie and Wang, Zhihui and Luo, Zhongxuan},
eprint = {1807.01884},
journal = {arXiv},
keywords = {Box-Based Detector,End-to-end,Scale-Adaptive Anchors},
pages = {0--7},
title = {{A single shot text detector with scale-adaptive anchors}},
year = {2018}
@article{Liao2018,
archivePrefix = {arXiv},
arxivId = {1803.05265},
author = {Liao, Minghui and Zhu, Zhen and Shi, Baoguang and Xia, Gui Song and Bai, Xiang},
doi = {10.1109/CVPR.2018.00619},
eprint = {1803.05265},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {5909--5918},
title = {{Rotation-Sensitive Regression for Oriented Scene Text Detection}},
year = {2018}
@article{Dai2018,
archivePrefix = {arXiv},
arxivId = {1709.03272},
author = {Dai, Yuchen and Huang, Zheng and Gao, Yuting and Xu, Youxuan and Chen, Kai and Guo, Jie and Qiu, Weidong},
doi = {10.1109/ICPR.2018.8546066},
eprint = {1709.03272},
isbn = {9781538637883},
issn = {10514651},
journal = {Proceedings - International Conference on Pattern Recognition},
pages = {3604--3609},
title = {{Fused Text Segmentation Networks for Multi-oriented Scene Text Detection}},
volume = {2018-Augus},
year = {2018}
@article{Li2018,
author = {Li, Xiang and W., Wenhai Wan and Hou, Wenbo and Liu, Ruo Ze and Lu, Tong and Yang, Jian},
journal = {arXiv},
pages = {9336--9345},
title = {{Shape robust text detection with progressive scale expansion network}},
year = {2018}
@article{Yang2018,
author = {Yang, Hailin and Jin, Lianwen and Huang, Weiguo},
doi = {10.1109/ACCESS.2018.2840218},
isbn = {2016070102},
journal = {IEEE Access},
pages = {30174--30183},
publisher = {IEEE},
title = {{Dense and Tight Detection of Chinese Characters in Historical Documents : Datasets and a Recognition Guided Detector}},
volume = {6},
year = {2018}
@article{Deng2018,
archivePrefix = {arXiv},
arxivId = {1801.01315},
author = {Deng, Dan and Liu, Haifeng and Li, Xuelong and Cai, Deng},
eprint = {1801.01315},
isbn = {9781577358008},
journal = {32nd AAAI Conference on Artificial Intelligence, AAAI 2018},
pages = {6773--6780},
title = {{PixelLink: Detecting scene text via instance segmentation}},
year = {2018}
@article{Zhang2018,
archivePrefix = {arXiv},
arxivId = {1712.00433},
author = {Zhang, Zhishuai and Qiao, Siyuan and Xie, Cihang and Shen, Wei and Wang, Bo and Yuille, Alan L.},
doi = {10.1109/CVPR.2018.00609},
eprint = {1712.00433},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {5813--5821},
title = {{Single-Shot Object Detection with Enriched Semantics}},
year = {2018}
@article{Cai2018,
archivePrefix = {arXiv},
arxivId = {1712.00726},
author = {Cai, Zhaowei and Vasconcelos, Nuno},
doi = {10.1109/CVPR.2018.00644},
eprint = {1712.00726},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {6154--6162},
title = {{Cascade R-CNN: Delving into High Quality Object Detection}},
year = {2018}
@article{Hu2018,
archivePrefix = {arXiv},
arxivId = {1711.11575},
author = {Hu, Han and Gu, Jiayuan and Zhang, Zheng and Dai, Jifeng and Wei, Yichen},
doi = {10.1109/CVPR.2018.00378},
eprint = {1711.11575},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {3588--3597},
title = {{Relation Networks for Object Detection}},
year = {2018}
@article{Singh2018,
archivePrefix = {arXiv},
arxivId = {1712.01802},
author = {Singh, Bharat and Li, Hengduo and Sharma, Abhishek and Davis, Larry S.},
doi = {10.1109/CVPR.2018.00119},
eprint = {1712.01802},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {1081--1090},
title = {{R-FCN-3000 at 30fps: Decoupling Detection and Classification}},
year = {2018}
@article{Wang2018a,
archivePrefix = {arXiv},
arxivId = {arXiv:1912.10205v1},
author = {Wang, Tianwei and Zhu, Yuanzhi and Jin, Lianwen and Luo, Canjie and Chen, Xiaoxue and Wu, Yaqiang and Wang, Qianying and Cai, Mingxiang},
eprint = {arXiv:1912.10205v1},
title = {{Decoupled Attention Network for Text Recognition}},
year = {2018}
@article{Lyu2018,
archivePrefix = {arXiv},
arxivId = {1802.08948},
author = {Lyu, Pengyuan and Yao, Cong and Wu, Wenhao and Yan, Shuicheng and Bai, Xiang},
doi = {10.1109/CVPR.2018.00788},
eprint = {1802.08948},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {7553--7563},
title = {{Multi-oriented Scene Text Detection via Corner Localization and Region Segmentation}},
year = {2018}
@article{Yang2018a,
archivePrefix = {arXiv},
arxivId = {1805.01167},
author = {Yang, Qiangpeng and Cheng, Mengli and Zhou, Wenmeng and Chen, Yan and Qiu, Minghui and Lin, Wei},
doi = {10.24963/ijcai.2018/149},
eprint = {1805.01167},
isbn = {9780999241127},
issn = {10450823},
journal = {IJCAI International Joint Conference on Artificial Intelligence},
pages = {1071--1077},
title = {{Inceptext: A new inception-text module with deformable PSROI pooling for multi-oriented scene text detection}},
volume = {2018-July},
year = {2018}
@article{Liao2018a,
archivePrefix = {arXiv},
arxivId = {1801.02765},
author = {Liao, Minghui and Shi, Baoguang and Bai, Xiang},
doi = {10.1109/TIP.2018.2825107},
eprint = {1801.02765},
issn = {10577149},
journal = {IEEE Transactions on Image Processing},
keywords = {Scene text detection,convolutional neural networks,multi-oriented text,scene text recognition,word spotting},
number = {8},
pages = {3676--3690},
pmid = {29993831},
title = {{TextBoxes++: A Single-Shot Oriented Scene Text Detector}},
volume = {27},
year = {2018}
@article{Ma2018,
archivePrefix = {arXiv},
arxivId = {1703.01086},
author = {Ma, Jianqi and Shao, Weiyuan and Ye, Hao and Wang, Li and Wang, Hong and Zheng, Yingbin and Xue, Xiangyang},
doi = {10.1109/TMM.2018.2818020},
eprint = {1703.01086},
issn = {15209210},
journal = {IEEE Transactions on Multimedia},
keywords = {Scene text detection,arbitrary oriented,rotation proposals},
number = {11},
pages = {3111--3122},
title = {{Arbitrary-oriented scene text detection via rotation proposals}},
volume = {20},
year = {2018}
@article{Cheng2018,
archivePrefix = {arXiv},
arxivId = {1711.04226},
author = {Cheng, Zhanzhan and Xu, Yangliu and Bai, Fan and Niu, Yi and Pu, Shiliang and Zhou, Shuigeng},
doi = {10.1109/CVPR.2018.00584},
eprint = {1711.04226},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {5571--5579},
title = {{AON: Towards Arbitrarily-Oriented Text Recognition}},
year = {2018}
@article{Singh2018a,
archivePrefix = {arXiv},
arxivId = {1711.08189},
author = {Singh, Bharat and Davis, Larry S.},
doi = {10.1109/CVPR.2018.00377},
eprint = {1711.08189},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {3578--3587},
title = {{An Analysis of Scale Invariance in Object Detection - SNIP}},
year = {2018}
@article{Xu2018,
author = {Xu, Zhenbo and Yang, Wei and Meng, Ajin and Lu, Nanxue and Huang, Huan and Ying, Changchun and Huang, Liusheng},
doi = {10.1007/978-3-030-01261-8_16},
isbn = {9783030012601},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Convolutional neural network,Object detection,Object recognition,Object segmentation},
pages = {261--277},
title = {{Towards end-to-end license plate detection and recognition: A large dataset and baseline}},
volume = {11217 LNCS},
year = {2018}
@article{He2018,
archivePrefix = {arXiv},
arxivId = {1803.03474},
author = {He, Tong and Tian, Zhi and Huang, Weilin and Shen, Chunhua and Qiao, Yu and Sun, Changming},
doi = {10.1109/CVPR.2018.00527},
eprint = {1803.03474},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {5020--5029},
title = {{An End-to-End TextSpotter with Explicit Alignment and Attention}},
year = {2018}
@article{Synchrosqueezed2018,
author = {Synchrosqueezed, Hierarchical Threshold and Transform, Wavelet and Wang, Wen-bo and Jing, Yun-yu},
title = {{An Improved Text Localization Method for Natural Scene Images An Improved Text Localization Method for Natural Scene Images}},
year = {2018}
@article{Liu2018a,
archivePrefix = {arXiv},
arxivId = {1801.01671},
author = {Liu, Xuebo and Liang, Ding and Yan, Shi and Chen, Dagui and Qiao, Yu and Yan, Junjie},
doi = {10.1109/CVPR.2018.00595},
eprint = {1801.01671},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {5676--5685},
title = {{FOTS: Fast Oriented Text Spotting with a Unified Network}},
year = {2018}
@article{Bartz2018,
archivePrefix = {arXiv},
arxivId = {1712.05404},
author = {Bartz, Christian and Yang, Haojin and Meinel, Christoph},
eprint = {1712.05404},
isbn = {9781577358008},
journal = {32nd AAAI Conference on Artificial Intelligence, AAAI 2018},
number = {1},
pages = {6674--6681},
title = {{See: Towards semi-supervised end-to-end scene text recognition}},
year = {2018}
@article{Liu2018b,
author = {Liu, Zichuan and Li, Yixing and Ren, Fengbo and Goh, Wang Ling and Yu, Hao},
isbn = {9781577358008},
journal = {32nd AAAI Conference on Artificial Intelligence, AAAI 2018},
keywords = {VIsion Track},
pages = {7194--7201},
title = {{SqueezedText: A real-time scene text recognition by binary convolutional encoder-decoder network}},
year = {2018}
@article{Reul2018,
archivePrefix = {arXiv},
arxivId = {1802.10038},
author = {Reul, Christian and Springmann, Uwe and Wick, Christoph and Puppe, Frank},
eprint = {1802.10038},
journal = {arXiv},
number = {x},
pages = {1--22},
title = {{Improving OCR accuracy on early printed books by combining pretraining, voting, and active learning}},
year = {2018}
@article{Bai2018,
author = {Bai, Fan and Cheng, Zhanzhan and Niu, Yi and Pu, Shiliang and Zhou, Shuigeng},
journal = {arXiv},
title = {{Edit probability for scene text recognition}},
year = {2018}
@article{Long2018,
archivePrefix = {arXiv},
arxivId = {1811.04256},
author = {Long, Shangbang and He, Xin and Yao, Cong},
eprint = {1811.04256},
journal = {arXiv},
keywords = {Deep learning,Detection,Recognition,Scene text,Survey},
title = {{Scene text detection and recognition: The deep learning era}},
year = {2018}
@article{Liu2018c,
author = {Liu, Fang and Zhou, Zhaoye and Samsonov, Alexey and Blankenbaker, Donna and Larison, Will and Kanarek, Andrew and Lian, Kevin and Kambhampati, Shivkumar and Kijowski, Richard},
doi = {10.1148/radiol.2018172986},
issn = {15271315},
journal = {Radiology},
number = {1},
pages = {160--169},
pmid = {30063195},
title = {{Deep learning approach for evaluating knee MR images: Achieving high diagnostic performance for cartilage lesion detection}},
volume = {289},
year = {2018}
@article{Zhu2018,
archivePrefix = {arXiv},
arxivId = {1811.12786},
author = {Zhu, Yixing and Du, Jun},
eprint = {1811.12786},
journal = {arXiv},
title = {{TextMountain: Accurate scene text detection via instance segmentation}},
year = {2018}
@article{Zhan2018,
author = {Zhan, Fangneng and Lu, Shijian},
journal = {arXiv},
title = {{ESIR: End-to-end Scene Text Recognition via Iterative Image Rectification}},
year = {2018}
@article{Liu2018d,
author = {Liu, Xuebo and Liang, Ding and Yan, Shi and Chen, Dagui and Qiao, Yu and Yan, Junjie},
journal = {arXiv},
pages = {5676--5685},
title = {{FOTS: Fast oriented text spotting with a unified network}},
year = {2018}
@article{Johnson2018,
archivePrefix = {arXiv},
arxivId = {1805.00500},
author = {Johnson, Jeremiah W.},
doi = {10.1007/978-3-030-17798-0},
eprint = {1805.00500},
journal = {arXiv},
pages = {1--7},
title = {{Adapting Mask-RCNN for automatic nucleus segmentation}},
year = {2018}
@article{Shi2018,
author = {Shi, Baoguang and Yang, Mingkun and Wang, Xinggang and Lyu, Pengyuan and Yao, Cong and Bai, Xiang},
doi = {10.1109/TPAMI.2018.2848939},
issn = {01628828},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Character recognition,Decoding,Detectors,Image Transformation,Proposals,Recurrent neural networks,Scene Text Recognition,Sequence-to-Sequence Learning,Text recognition,Thin-Plate Spline},
number = {c},
pages = {1},
pmid = {29994467},
publisher = {IEEE},
title = {{ASTER: An Attentional Scene Text Recognizer with Flexible Rectification}},
volume = {PP},
year = {2018}
@article{Lyu2018a,
archivePrefix = {arXiv},
arxivId = {1908.08207},
author = {Lyu, Pengyuan and Liao, Minghui and Yao, Cong and Wu, Wenhao and Bai, Xiang},
doi = {10.1007/978-3-030-01264-9_5},
eprint = {1908.08207},
isbn = {9783030012632},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Arbitrary shapes,Neural network,Scene text spotting},
pages = {71--88},
title = {{Mask textspotter: An end-to-end trainable neural network for spotting text with arbitrary shapes}},
volume = {11218 LNCS},
year = {2018}
@article{He2018a,
archivePrefix = {arXiv},
arxivId = {1803.03474},
author = {He, Tong and Tian, Zhi and Huang, Weilin and Shen, Chunhua and Qiao, Yu and Sun, Changming},
doi = {10.1109/CVPR.2018.00527},
eprint = {1803.03474},
isbn = {9781538664209},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {5020--5029},
title = {{An End-to-End TextSpotter with Explicit Alignment and Attention}},
year = {2018}
@article{Chen2018,
author = {Chen, Boyo and Chen, Buo Fu and Lin, Hsuan Tien},
doi = {10.1145/3219819.3219926},
isbn = {9781450355520},
journal = {Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
keywords = {Atmospheric Science,Blending,Convolutional Neural Network,Dropout,Pooling,Regression,Tropical cyclone,Tropical cyclone intensity},
pages = {90--99},
title = {{Rotation-blended CNNs on a new open dataset for tropical cyclone image-to-intensity regression}},
year = {2018}
@article{Gao2018,
author = {Gao, Ge and Lauri, Mikko and Zhang, Jianwei and Frintrop, Simone},
journal = {arXiv},
keywords = {6D pose estimation,Convolutional neural network,Lie algebra,Point cloud},
title = {{Occlusion resistant object rotation regression from point cloud segments}},
year = {2018}
@article{Sabir2018,
archivePrefix = {arXiv},
arxivId = {1805.09441},
author = {Sabir, Ekraam and Rawls, Stephen and Natarajan, Prem},
doi = {10.1109/ICDAR.2017.361},
eprint = {1805.09441},
isbn = {9781538635865},
issn = {15205363},
journal = {Proceedings of the International Conference on Document Analysis and Recognition, ICDAR},
keywords = {LSTM,Language Model,OCR},
number = {implicit LM},
pages = {27--31},
title = {{Implicit Language Model in LSTM for OCR}},
volume = {7},
year = {2018}
@article{Lyu2018b,
archivePrefix = {arXiv},
arxivId = {1908.08207},
author = {Lyu, Pengyuan and Liao, Minghui and Yao, Cong and Wu, Wenhao and Bai, Xiang},
doi = {10.1007/978-3-030-01264-9_5},
eprint = {1908.08207},
isbn = {9783030012632},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Arbitrary shapes,Neural network,Scene text spotting},
pages = {71--88},
title = {{Mask textspotter: An end-to-end trainable neural network for spotting text with arbitrary shapes}},
volume = {11218 LNCS},
year = {2018}
@article{Zhan2018a,
archivePrefix = {arXiv},
arxivId = {1807.03021},
author = {Zhan, Fangneng and Lu, Shijian and Xue, Chuhui},
doi = {10.1007/978-3-030-01237-3_16},
eprint = {1807.03021},
isbn = {9783030012366},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Data augmentation,Image synthesis,Scene text detection,Scene text recognition},
pages = {257--273},
title = {{Verisimilar image synthesis for accurate detection and recognition of texts in scenes}},
volume = {11212 LNCS},
year = {2018}
@inproceedings{Long2018a,
archivePrefix = {arXiv},
arxivId = {1807.01544},
author = {Long, Shangbang and Ruan, Jiaqiang and Zhang, Wenjie and He, Xin and Wu, Wenhao and Yao, Cong},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-030-01216-8_2},
eprint = {1807.01544},
isbn = {9783030012151},
issn = {16113349},
keywords = {Curved text,Deep neural network,Scene text detection},
pages = {19--35},
title = {{TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes}},
volume = {11206 LNCS},
year = {2018}
@article{Gao2018a,
archivePrefix = {arXiv},
arxivId = {1808.00677},
author = {Gao, Yuting and Huang, Zheng and Dai, Yuchen},
eprint = {1808.00677},
journal = {arXiv},
keywords = {Scene text recognition},
title = {{DSAN: Double supervised network with attention mechanism for scene text recognition}},
year = {2018}
@article{Borisyuk2018,
archivePrefix = {arXiv},
arxivId = {1910.05085},
author = {Borisyuk, Fedor and Gordo, Albert and Sivakumar, Viswanath},
doi = {10.1145/3219819.3219861},
eprint = {1910.05085},
isbn = {9781450355520},
journal = {Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
keywords = {Optical character recognition,Text detection,Text recognition},
pages = {71--79},
title = {{Rosetta: Large scale system for text detection and recognition in images}},
year = {2018}
@article{Roy2018,
archivePrefix = {arXiv},
arxivId = {1804.06254},
author = {Roy, Partha Pratim and Mohta, Akash and Chaudhuri, Bidyut B.},
eprint = {1804.06254},
journal = {arXiv},
keywords = {Hidden Markov Models,Indic Text Recognition,Synthetic Data Generation},
pages = {1--35},
title = {{Synthetic data generation for Indic handwritten text recognition}},
year = {2018}
@article{Zhang2019,
archivePrefix = {arXiv},
arxivId = {1904.06535},
author = {Zhang, Chengquan and Liang, Borong and Huang, Zuming and En, Mengyi and Han, Junyu and Ding, Errui and Ding, Xinghao},
doi = {10.1109/CVPR.2019.01080},
eprint = {1904.06535},
isbn = {9781728132938},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
keywords = {Categorization,Document Analysis,Grouping and Shape,Recognition: Detection,Retrieval,Segmentation,Vision Applicat},
pages = {10544--10553},
title = {{Look more than once: An accurate detector for text of arbitrary shapes}},
volume = {2019-June},
year = {2019}
@inproceedings{Liu2019,
archivePrefix = {arXiv},
arxivId = {1903.08836},
author = {Liu, Zichuan and Lin, Guosheng and Yang, Sheng and Liu, Fayao and Lin, Weisi and Goh, Wang Ling},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.00744},
eprint = {1903.08836},
isbn = {9781728132938},
issn = {10636919},
keywords = {Categorization,Recognition: Detection,Retrieval},
pages = {7261--7270},
title = {{Towards robust curve text detection with conditional spatial expansion}},
volume = {2019-June},
year = {2019}
@inproceedings{Liu2019a,
archivePrefix = {arXiv},
arxivId = {1903.08836},
author = {Liu, Zichuan and Lin, Guosheng and Yang, Sheng and Liu, Fayao and Lin, Weisi and Goh, Wang Ling},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.00744},
eprint = {1903.08836},
isbn = {9781728132938},
issn = {10636919},
keywords = {Categorization,Recognition: Detection,Retrieval},
pages = {7261--7270},
title = {{Towards robust curve text detection with conditional spatial expansion}},
volume = {2019-June},
year = {2019}
@inproceedings{Zhang2019a,
archivePrefix = {arXiv},
arxivId = {1904.06535},
author = {Zhang, Chengquan and Liang, Borong and Huang, Zuming and En, Mengyi and Han, Junyu and Ding, Errui and Ding, Xinghao},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.01080},
eprint = {1904.06535},
isbn = {9781728132938},
issn = {10636919},
keywords = {Categorization,Document Analysis,Grouping and Shape,Recognition: Detection,Retrieval,Segmentation,Vision Applicat},
pages = {10544--10553},
title = {{Look more than once: An accurate detector for text of arbitrary shapes}},
volume = {2019-June},
year = {2019}
@inproceedings{Zhang2019b,
archivePrefix = {arXiv},
arxivId = {1904.06535},
author = {Zhang, Chengquan and Liang, Borong and Huang, Zuming and En, Mengyi and Han, Junyu and Ding, Errui and Ding, Xinghao},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.01080},
eprint = {1904.06535},
isbn = {9781728132938},
issn = {10636919},
keywords = {Categorization,Document Analysis,Grouping and Shape,Recognition: Detection,Retrieval,Segmentation,Vision Applicat},
pages = {10544--10553},
title = {{Look more than once: An accurate detector for text of arbitrary shapes}},
volume = {2019-June},
year = {2019}
@inproceedings{Zhang2019c,
archivePrefix = {arXiv},
arxivId = {1904.06535},
author = {Zhang, Chengquan and Liang, Borong and Huang, Zuming and En, Mengyi and Han, Junyu and Ding, Errui and Ding, Xinghao},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.01080},
eprint = {1904.06535},
isbn = {9781728132938},
issn = {10636919},
keywords = {Categorization,Document Analysis,Grouping and Shape,Recognition: Detection,Retrieval,Segmentation,Vision Applicat},
pages = {10544--10553},
title = {{Look more than once: An accurate detector for text of arbitrary shapes}},
volume = {2019-June},
year = {2019}
@article{Jiao2019,
author = {Jiao, Licheng and Zhang, F A N and Liu, Fang and Member, Senior},
doi = {10.1109/ACCESS.2019.2939201},
journal = {IEEE Access},
pages = {128837--128868},
publisher = {IEEE},
title = {{A Survey of Deep Learning-Based Object Detection}},
volume = {7},
year = {2019}
@article{Ch2019,
author = {Ch, Chee-kheng and Chan, Chee Seng and Liu, Cheng-lin},
doi = {10.1007/s10032-019-00334-z},
issn = {1433-2825},
journal = {International Journal on Document Analysis and Recognition (IJDAR)},
keywords = {Curved text,Scene text detection,curved text,scene text detection},
publisher = {Springer Berlin Heidelberg},
title = {{Total-Text : toward orientation robustness in scene text detection}},
url = {https://doi.org/10.1007/s10032-019-00334-z},
year = {2019}
@article{Deng2019,
archivePrefix = {arXiv},
arxivId = {arXiv:1804.02690v2},
author = {Deng, Linjie and Gong, Yanxiang and Lin, Yi and Shuai, Jingwen and Tu, Xiaoguang and Zhang, Yuefei and Ma, Zheng and Xie, Mei},
eprint = {arXiv:1804.02690v2},
keywords = {corner-based region proposal network,dual-roi pooling,multi-oriented text detection},
title = {{Detecting Multi-Oriented Text with Corner-based Region Proposals}},
year = {2019}
@article{Wang2019,
author = {Wang, Wenhai and Xie, Enze and Song, Xiaoge and Zang, Yuhang and Wang, Wenjia and Lu, Tong and Yu, Gang and Shen, Chunhua},
journal = {Iccv2019},
pages = {8440--8449},
title = {{Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network National Key Lab for Novel Software Technology , Nanjing University University of Electronic Science and Technology of China The University of Adelaide}},
year = {2019}
@article{Zhang2019d,
author = {Zhang, Chengquan and Liang, Borong and Huang, Zuming and En, Mengyi and Han, Junyu and Ding, Errui and Ding, Xinghao},
journal = {arXiv},
title = {{Look more than once: An accurate detector for text of arbitrary shapes}},
volume = {1},
year = {2019}
@book{Liu2019b,
author = {Liu, Chao and Zou, Yuexian and Yang, Dongming},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-030-05710-7_44},
isbn = {9783030057091},
issn = {16113349},
keywords = {Attention mechanism,Hierarchical feature fusion,Scene text detection (STD),Semantic segmentation},
pages = {531--542},
publisher = {Springer International Publishing},
title = {{Enhancing Scene Text Detection via Fused Semantic Segmentation Network with Attention}},
url = {http://dx.doi.org/10.1007/978-3-030-05710-7{\_}44},
volume = {11295 LNCS},
year = {2019}
@article{Conceicao2019,
author = {Concei{\c{c}}{\~{a}}o, Jhonatas Santos de Jesus and Pinto, Allan and Decker, Luis and Campana, Jose Luis Flores and Neira, Manuel Cordova and {Dos Santos}, Andrezza A. and Pedrini, Helio and Torres, Ricardo},
doi = {10.5753/sibgrapi.est.2019.8333},
pages = {215--218},
title = {{Multi-Lingual Text Localization via Language-Specific Convolutional Neural Networks}},
year = {2019}
@article{Liu2019c,
archivePrefix = {arXiv},
arxivId = {1903.11800},
author = {Liu, Jingchao and Liu, Xuebo and Sheng, Jie and Liang, Ding and Li, Xin and Liu, Qingjie},
eprint = {1903.11800},
journal = {arXiv},
title = {{Pyramid mask text detector}},
year = {2019}
@article{Xing2019,
author = {Xing, Linjie and Tian, Zhi and Huang, Weilin and Scott, Matthew R.},
journal = {arXiv},
pages = {9126--9136},
title = {{Convolutional character networks}},
year = {2019}
@article{Feng2019,
author = {Feng, Wei and He, Wenhao and Yin, Fei and Zhang, Xu Yao and Liu, Cheng Lin},
doi = {10.1109/ICCV.2019.00917},
isbn = {9781728148038},
issn = {15505499},
journal = {Proceedings of the IEEE International Conference on Computer Vision},
pages = {9075--9084},
title = {{Textdragon: An end-to-end framework for arbitrary shaped text spotting}},
volume = {2019-Octob},
year = {2019}
@article{Wang2019a,
author = {Wang, Xiaobing and Jiang, Yingying and Luo, Zhenbo and Liu, Cheng Lin and Choi, Hyunsoo and Kim, Sungjin},
doi = {10.1109/CVPR.2019.00661},
isbn = {9781728132938},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
keywords = {Categorization,Recognition: Detection,Retrieval,Vision Applications and Systems},
pages = {6442--6451},
title = {{Arbitrary shape scene text detection with adaptive text region representation}},
volume = {2019-June},
year = {2019}
@article{Li2019,
archivePrefix = {arXiv},
arxivId = {1709.08828},
author = {Li, Hui and Wang, Peng and Shen, Chunhua},
doi = {10.1109/TITS.2018.2847291},
eprint = {1709.08828},
issn = {15249050},
journal = {IEEE Transactions on Intelligent Transportation Systems},
keywords = {Car plate detection and recognition,convolutional neural networks,recurrent neural networks},
number = {3},
pages = {1126--1136},
title = {{Toward End-to-End Car License Plate Detection and Recognition with Deep Neural Networks}},
volume = {20},
year = {2019}
@article{Series2019,
author = {Series, Conference},
doi = {10.1088/1742-6596/1314/1/012200},
title = {{Natural Scene Chinese Character Text Detection Method Based on Improved CTPN Natural Scene Chinese Character Text Detection Method Based on Improved CTPN}},
year = {2019}
@article{Zhan2019,
archivePrefix = {arXiv},
arxivId = {1812.05824},
author = {Zhan, Fangneng and Lu, Shijian},
doi = {10.1109/CVPR.2019.00216},
eprint = {1812.05824},
isbn = {9781728132938},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
keywords = {Categorization,Deep Learning,Document Analysis,Recognition: Detection,Retrieval},
pages = {2054--2063},
title = {{ESIR: End-to-end scene text recognition via iterative image rectification}},
volume = {2019-June},
year = {2019}
@article{Goel2019,
author = {Goel, Vaibhav and Kumar, Vaibhav and Jaggi, Amandeep Singh and Nagrath, Preeti},
doi = {10.5815/ijitcs.2019.09.06},
number = {September},
pages = {48--54},
title = {{Text Extraction from Natural Scene Images using OpenCV and CNN}},
year = {2019}
@article{Work2019,
author = {Work, Elated},
number = {2015},
pages = {1--15},
title = {{O BJECT DETECTION DEEP LEARNING}},
year = {2019}
@article{NguyenVan2019,
archivePrefix = {arXiv},
arxivId = {1811.10003},
author = {NguyenVan, Dinh and Lu, Shijian and Tian, Shangxuan and Ouarti, Nizar and Mokhtari, Mounir},
doi = {10.1016/j.patcog.2018.10.012},
eprint = {1811.10003},
issn = {00313203},
journal = {Pattern Recognition},
keywords = {Pooling based grouping,Scene text detection,Scene text proposal,Scene text reading,Scene text spotting},
pages = {118--129},
title = {{A pooling based scene text proposal technique for scene text reading in the wild}},
volume = {87},
year = {2019}
@article{Blanco-medina2019,
author = {Blanco-medina, Pablo and Fidalgo, Eduardo and Alegre, Enrique and Al-nabki, Mhd Wesam and Chaves, Deisy},
isbn = {9788497497169},
keywords = {10,11,and counterfeiting personal,cybersecurity,drugs trading,identification documents,ing,ocr,text recognition,text spotting,tor darknet},
pages = {828--835},
title = {{ENHANCING TEXT RECOGNITION ON TOR DARKNET}},
year = {2019}
@article{Singh2019,
human-in-the-loop, is an interesting problem for surveillance and other
similar applications. Achieving high accuracy while reading license
plates in the real world videos is cumbersome due to complexities like
multiple vehicles, high-density traffic in spatial and temporal domains,
varying camera angles and illumination, occlusions and multiple
resolutions. We present a modular framework for OCR corrections in the
chaotic Indian traffic videos that especially involve complex license
plate patterns. Such patterns are obtained from a state-of-the-art deep
learning model trained on video frames. Since such a model reads the
text from videos (instead of images), we incorporate multi-frame
consensus for generating suggestions in our framework. To ease the
correction process, our human-interactive framework first breaks down
the multi-vehicle videos into multiple clips, each containing a single
vehicle from the video using an object detector and a tracker. Our
framework then provides suggestions for an individual vehicle using
multi-frame consensus. Our framework then selectively presents these
extracted clips to the user to verify/correct the predictions with
minimal human efforts via interactive suggestions. Such high-quality
output can be used to continuously update a large database for
surveillance and can be further used to improve the accuracy of deep
models in the complex real-world scenarios.},
author = {Singh, Pankaj and Patwa, Bhavya and Saluja, Rohit and Ramakrishnan, Ganesh and Chaudhuri, Parag},
doi = {10.1109/icdarw.2019.10036},
pages = {36--40},
title = {{StreetOCRCorrect: An Interactive Framework for OCR Corrections in Chaotic Indian Street Videos}},
year = {2019}
@article{Luo2019,
archivePrefix = {arXiv},
arxivId = {1901.03003},
author = {Luo, Canjie and Jin, Lianwen and Sun, Zenghui},
doi = {10.1016/j.patcog.2019.01.020},
eprint = {1901.03003},
issn = {00313203},
journal = {Pattern Recognition},
keywords = {Deep learning,Optical character recognition,Scene text recognition},
pages = {109--118},
title = {{MORAN: A Multi-Object Rectified Attention Network for scene text recognition}},
volume = {90},
year = {2019}
@article{Wang2019b,
archivePrefix = {arXiv},
arxivId = {1903.12473},
author = {Wang, Wenhai and Xie, Enze and Li, Xiang and Hou, Wenbo and Lu, Tong and Yu, Gang and Shao, Shuai},
doi = {10.1109/CVPR.2019.00956},
eprint = {1903.12473},
isbn = {9781728132938},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
keywords = {Categorization,Deep Learning,Recognition: Detection,Retrieval},
number = {c},
pages = {9328--9337},
title = {{Shape robust text detection with progressive scale expansion network}},
volume = {2019-June},
year = {2019}
@article{Cheng2019,
archivePrefix = {arXiv},
arxivId = {1906.09731},
author = {Cheng, Zhengxue and Sun, Heming and Takeuchi, Masaru and Katto, Jiro},
eprint = {1906.09731},
journal = {arXiv},
title = {{Deep residual learning for image compression}},
year = {2019}
@article{Vuola2019,
archivePrefix = {arXiv},
arxivId = {1901.10170},
author = {Vuola, Aarno Oskar and Akram, Saad Ullah and Kannala, Juho},
doi = {10.1109/ISBI.2019.8759574},
eprint = {1901.10170},
isbn = {9781538636411},
issn = {19458452},
journal = {Proceedings - International Symposium on Biomedical Imaging},
keywords = {Convolutional neural networks,Microscopy image analysis,Nuclei segmentation},
pages = {208--212},
title = {{Mask-RCNN and u-net ensembled for nuclei segmentation}},
volume = {2019-April},
year = {2019}
@article{Zhao2019,
author = {Zhao, Yongqiang and Han, Rui and Rao, Yuan},
doi = {10.1109/ICVRIS.2019.00110},
isbn = {9781728150505},
journal = {Proceedings - 2019 International Conference on Virtual Reality and Intelligent Systems, ICVRIS 2019},
keywords = {Accurate,Fast,New Feature Pyramid},
pages = {428--431},
title = {{A new feature pyramid network for object detection}},
year = {2019}
@article{Liao2019,
archivePrefix = {arXiv},
arxivId = {1911.08947},
author = {Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
doi = {10.1609/aaai.v34i07.6812},
eprint = {1911.08947},
issn = {2159-5399},
journal = {arXiv},
title = {{Real-time scene text detection with differentiable binarization}},
year = {2019}
@article{Huang2019,
archivePrefix = {arXiv},
arxivId = {1811.09058},
author = {Huang, Zhida and Zhong, Zhuoyao and Sun, Lei and Huo, Qiang},
doi = {10.1109/WACV.2019.00086},
eprint = {1811.09058},
isbn = {9781728119755},
journal = {Proceedings - 2019 IEEE Winter Conference on Applications of Computer Vision, WACV 2019},
pages = {764--772},
title = {{Mask R-CNN with pyramid attention network for scene text detection}},
year = {2019}
@article{Tian2019,
author = {Tian, Zhuotao and Shu, Michelle and Lyu, Pengyuan and Li, Ruiyu and Zhou, Chao and Shen, Xiaoyong and Jia, Jiaya},
doi = {10.1109/CVPR.2019.00436},
isbn = {9781728132938},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
keywords = {Categorization,Recognition: Detection,Retrieval,Vision Applications and Systems},
pages = {4229--4238},
title = {{Learning shape-aware embedding for scene text detection}},
volume = {2019-June},
year = {2019}
@article{Xu2019,
archivePrefix = {arXiv},
arxivId = {1812.01393},
author = {Xu, Yongchao and Wang, Yukang and Zhou, Wei and Wang, Yongpan and Yang, Zhibo and Bai, Xiang},
doi = {10.1109/TIP.2019.2900589},
eprint = {1812.01393},
issn = {19410042},
journal = {IEEE transactions on image processing : a publication of the IEEE Signal Processing Society},
number = {11},
pages = {5566--5579},
pmid = {30802859},
title = {{TextField: Learning a Deep Direction Field for Irregular Scene Text Detection}},
volume = {28},
year = {2019}
@article{Yang2019,
archivePrefix = {arXiv},
arxivId = {1908.01957},
author = {Yang, Mingkun and Guan, Yushuo and Liao, Minghui and He, Xin and Bian, Kaigui and Bai, Song and Yao, Cong and Bai, Xiang},
doi = {10.1109/ICCV.2019.00924},
eprint = {1908.01957},
isbn = {9781728148038},
issn = {15505499},
journal = {Proceedings of the IEEE International Conference on Computer Vision},
pages = {9146--9155},
title = {{Symmetry-constrained rectification network for scene text recognition}},
volume = {2019-Octob},
year = {2019}
@article{Lyu2019,
archivePrefix = {arXiv},
arxivId = {1906.05708},
author = {Lyu, Pengyuan and Yang, Zhicheng and Leng, Xinhang and Wu, Xiaojun and Li, Ruiyu and Shen, Xiaoyong},
eprint = {1906.05708},
journal = {arXiv},
title = {{2D attentional irregular scene text recognizer}},
year = {2019}
@article{Lu2019,
archivePrefix = {arXiv},
arxivId = {1910.02562},
author = {Lu, Ning and Yu, Wenwen and Qi, Xianbiao and Chen, Yihao and Gong, Ping and Xiao, Rong},
eprint = {1910.02562},
journal = {arXiv},
keywords = {Non-local Network,OCR,Scene Text Recognition,Transformer},
pages = {1--11},
title = {{MASTER: Multi-aspect non-local network for scene text recognition}},
year = {2019}
@article{Ammirato2019,
archivePrefix = {arXiv},
arxivId = {1908.03621},
author = {Ammirato, Phil and Berg, Alexander C.},
eprint = {1908.03621},
journal = {arXiv},
pages = {4321--4325},
title = {{A mask-RCNN baseline for probabilistic object detection}},
year = {2019}
@article{Wang2019c,
archivePrefix = {arXiv},
arxivId = {1908.05498},
author = {Wang, Pengfei and Huang, Zuming and Liu, Jingtuo and Zhang, Chengquan and En, Mengyi and Ding, Errui and Qi, Fei and Han, Junyu and Shi, Guangming},
doi = {10.1145/3343031.3350988},
eprint = {1908.05498},
isbn = {9781450368896},
journal = {MM 2019 - Proceedings of the 27th ACM International Conference on Multimedia},
keywords = {Arbitrarily-shaped Text Detection,FCN,Real-time Segmentation},
number = {1},
pages = {1277--1285},
title = {{A single-shot arbitrarily-shaped text detector based on context attended multi-task learning}},
year = {2019}
@article{Sarshogh2019,
archivePrefix = {arXiv},
arxivId = {1906.09266},
author = {Sarshogh, Mohammad Reza and Hines, Keegan},
doi = {10.1109/ICDAR.2019.00085},
eprint = {1906.09266},
isbn = {9781728128610},
issn = {15205363},
journal = {Proceedings of the International Conference on Document Analysis and Recognition, ICDAR},
keywords = {Attention,Computer Vision,Deep Convolutional Neural Networks,Mask R-CNN,Multi-task Learning,Optical Character Recognition (OCR),Region-based Convolutional Networks (R-CNN)},
pages = {494--501},
title = {{A multi-task network for localization and recognition of text in images}},
year = {2019}
@inproceedings{Bhunia2019,
archivePrefix = {arXiv},
arxivId = {1811.01396},
author = {Bhunia, Ayan Kumar and Das, Abhirup and Bhunia, Ankan Kumar and Kishore, Perla Sai Raj and Roy, Partha Pratim},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.00490},
eprint = {1811.01396},
isbn = {9781728132938},
issn = {10636919},
keywords = {Deep Learning,Document Analysis},
pages = {4762--4771},
title = {{Handwriting recognition in low-resource scripts using adversarial learning}},
volume = {2019-June},
year = {2019}
@inproceedings{Liu2019d,
archivePrefix = {arXiv},
arxivId = {1903.08836},
author = {Liu, Zichuan and Lin, Guosheng and Yang, Sheng and Liu, Fayao and Lin, Weisi and Goh, Wang Ling},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.00744},
eprint = {1903.08836},
isbn = {9781728132938},
issn = {10636919},
keywords = {Categorization,Recognition: Detection,Retrieval},
pages = {7261--7270},
title = {{Towards robust curve text detection with conditional spatial expansion}},
volume = {2019-June},
year = {2019}
@inproceedings{Zhang2019e,
archivePrefix = {arXiv},
arxivId = {1904.06535},
author = {Zhang, Chengquan and Liang, Borong and Huang, Zuming and En, Mengyi and Han, Junyu and Ding, Errui and Ding, Xinghao},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2019.01080},
eprint = {1904.06535},
isbn = {9781728132938},
issn = {10636919},
keywords = {Categorization,Document Analysis,Grouping and Shape,Recognition: Detection,Retrieval,Segmentation,Vision Applicat},
pages = {10544--10553},
title = {{Look more than once: An accurate detector for text of arbitrary shapes}},
volume = {2019-June},
year = {2019}
@article{Xiao2020,
author = {Xiao, Shanyu and Peng, Liangrui and Yan, Ruijie and An, Keyu and Yao, Gang and Min, Jaesik},
doi = {10.1007/978-3-030-58526-6_7},
isbn = {9783030585259},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Deep neural network,Scene text detection,Sequential deformation},
pages = {108--124},
title = {{Sequential Deformation for Accurate Scene Text Detection}},
volume = {12374 LNCS},
year = {2020}
@article{Baek2020,
archivePrefix = {arXiv},
arxivId = {2007.09629},
author = {Baek, Youngmin and Shin, Seung and Baek, Jeonghun and Park, Sungrae and Lee, Junyeop and Nam, Daehyun and Lee, Hwalsuk},
doi = {10.1007/978-3-030-58526-6_30},
eprint = {2007.09629},
isbn = {9783030585259},
issn = {16113349},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
keywords = {Character Region Attention,Optical character recognition (OCR),Scene text detection,Scene text recognition,Text spotting},
pages = {504--521},
title = {{Character Region Attention for Text Spotting}},
volume = {12374 LNCS},
year = {2020}
@article{Li2020,
author = {Li, Liangcheng and Gao, Feiyu and Bu, Jiajun and Wang, Yongpan and Yu, Zhi and Zheng, Qi},
doi = {10.1007/978-3-030-58595-2_6},
keywords = {graph neural network,ocr text re-organization,pointer},
pages = {85--100},
title = {{An End-to-End OCR Text Re-organization Sequence Learning for Rich-Text Detail Image Comprehension}},
year = {2020}
@article{Zhang2020,
archivePrefix = {arXiv},
arxivId = {arXiv:2003.06567v2},
author = {Zhang, Hui and Yao, Quanming and Yang, Mingkun and Xu, Yongchao and Bai, Xiang},
eprint = {arXiv:2003.06567v2},
journal = {arXiv},
keywords = {Automated machine learning,Convolutional neural network,Neural architecture search,Scene text recognition},
title = {{Efficient backbone search for scene text recognition}},
year = {2020}
@incollection{Mou2020,
author = {Mou, Yongqiang and Tan, Lei and Yang, Hui and Chen, Jingying and Liu, Leyuan and Yan, Rui and Huang, Yaohong},
doi = {10.1007/978-3-030-58555-6_10},
keywords = {feature learning,neural network,scene text recognition},
pages = {158--174},
title = {{PlugNet: Degradation Aware Scene Text Recognition Supervised by a Pluggable Super-Resolution Unit}},
year = {2020}
@article{Baek2020a,
author = {Baek, Youngmin and Nam, Daehyun and Park, Sungrae and Lee, Junyeop and Shin, Seung and Baek, Jeonghun and Lee, Chae Young and Lee, Hwalsuk},
journal = {arXiv},
title = {{CLEval: Character-Level Evaluation for Text Detection and Recognition Tasks}},
year = {2020}
@incollection{Mou2020a,
author = {Mou, Yongqiang and Tan, Lei and Yang, Hui and Chen, Jingying and Liu, Leyuan and Yan, Rui and Huang, Yaohong},
doi = {10.1007/978-3-030-58555-6_10},
pages = {158--174},
title = {{PlugNet: Degradation Aware Scene Text Recognition Supervised by a Pluggable Super-Resolution Unit}},
year = {2020}
@incollection{Mou2020b,
author = {Mou, Yongqiang and Tan, Lei and Yang, Hui and Chen, Jingying and Liu, Leyuan and Yan, Rui and Huang, Yaohong},
doi = {10.1007/978-3-030-58555-6_10},
pages = {158--174},
title = {{PlugNet: Degradation Aware Scene Text Recognition Supervised by a Pluggable Super-Resolution Unit}},
year = {2020}
@incollection{Mou2020c,
author = {Mou, Yongqiang and Tan, Lei and Yang, Hui and Chen, Jingying and Liu, Leyuan and Yan, Rui and Huang, Yaohong},
doi = {10.1007/978-3-030-58555-6_10},
pages = {158--174},
title = {{PlugNet: Degradation Aware Scene Text Recognition Supervised by a Pluggable Super-Resolution Unit}},
year = {2020}
@article{Yu2020,
author = {Yu, Deli and Li, Xuan and Zhang, Chengquan and Liu, Tao and Han, Junyu and Liu, Jingtuo and Ding, Errui},
journal = {arXiv},
title = {{Towards accurate scene text recognition with semantic reasoning networks}},
volume = {2},
year = {2020}
@article{Yuan2020,
archivePrefix = {arXiv},
arxivId = {1807.03326},
author = {Yuan, Xiaoyong and He, Pan and Lit, Xiaolin and Wu, Dapeng},
doi = {10.1109/INFOCOMWKSHPS50562.2020.9162685},
eprint = {1807.03326},
isbn = {9781728186955},
journal = {IEEE INFOCOM 2020 - IEEE Conference on Computer Communications Workshops, INFOCOM WKSHPS 2020},
keywords = {Adversarial example,Deep learning,Multi-task learning,Scene text recognition},
pages = {358--363},
title = {{Adaptive adversarial attack on scene text recognition}},
year = {2020}
@article{Chen2020,
archivePrefix = {arXiv},
arxivId = {2005.11487},
author = {Chen, Yudi and Wang, Wei and Zhou, Yu and Yang, Fei and Yang, Dongbao and Wang, Weiping},
eprint = {2005.11487},
journal = {arXiv},
title = {{Self-Training for Domain Adaptive Scene Text Detection}},
year = {2020}
@article{Wang2020,
archivePrefix = {arXiv},
arxivId = {1911.07046},
author = {Wang, Qitong and Zheng, Yi and Betke, Margrit},
doi = {10.1109/CVPRW50498.2020.00278},
eprint = {1911.07046},
isbn = {9781728193601},
issn = {21607516},
journal = {IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops},
pages = {2296--2305},
title = {{A method for detecting text of arbitrary shapes in natural scenes that improves text spotting}},
volume = {2020-June},
year = {2020}
@article{Jiang2020,
author = {Jiang, Xiufeng and Xu, Shugong},
doi = {10.1109/ACCESS.2020.2999069},
title = {{Arbitrary-Shaped Text Detection With Adaptive Text Region Representation}},
volume = {8},
year = {2020}
@article{Kong2020,
author = {Kong, Tao and Sun, Fuchun and Liu, Huaping and Jiang, Yuning and Li, Lei and Shi, Jianbo},
pages = {7389--7398},
title = {{FoveaBox : Beyound Anchor-Based Object Detection}},
volume = {29},
year = {2020}
@article{Wang2020a,
archivePrefix = {arXiv},
arxivId = {2008.04851},
author = {Wang, Fangfang and Wu, Fei and Chen, Yifeng and Li, Xi},
doi = {10.1145/3394171.3413819},
eprint = {2008.04851},
isbn = {9781450379885},
journal = {arXiv},
keywords = {Arbitrary-shaped Text Detection,Geometric Modeling},
title = {{TextRay: Contour-based geometric modeling for arbitrary-shaped scene text detection}},
year = {2020}
@article{Zhang2020a,
archivePrefix = {arXiv},
arxivId = {2002.03741},
author = {Zhang, Liang and Liu, Yufei and Xiao, Hang and Yang, Lu and Zhu, Guangming and Shah, Syed Afaq and Bennamoun, Mohammed and Shen, Peiyi},
doi = {10.1109/ICASSP40776.2020.9054213},
eprint = {2002.03741},
isbn = {9781509066315},
issn = {15206149},
journal = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
keywords = {Scene text detection,multi-oriented text,textual attention tower},
pages = {4272--4276},
title = {{Efficient Scene Text Detection with Textual Attention Tower}},
volume = {2020-May},
year = {2020}
@article{Liu2020,
author = {Liu, Xi and Zhou, Gaojing and Zhang, Rui and Wei, Xiaolin},
doi = {10.1109/CVPRW50498.2020.00283},
isbn = {9781728193601},
issn = {21607516},
journal = {IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops},
pages = {2344--2352},
title = {{An accurate segmentation-based scene text detector with context attention and repulsive text border}},
volume = {2020-June},
year = {2020}
@article{Qin2020,
archivePrefix = {arXiv},
arxivId = {2007.05113},
author = {Qin, Xugong and Zhou, Yu and Wu, Dayan and Yue, Yinliang and Wang, Weiping},
eprint = {2007.05113},
journal = {arXiv},
pages = {1--17},
title = {{FC2RN: A Fully Convolutional Corner Refinement Network for Accurate Multi-Oriented Scene Text Detection}},
year = {2020}
@article{Wu2020,
archivePrefix = {arXiv},
arxivId = {2009.01766},
author = {Wu, Weijia and Lu, Ning and Xie, Enze},
eprint = {2009.01766},
pages = {1--14},
title = {{Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild}},
url = {http://arxiv.org/abs/2009.01766},
year = {2020}
@article{Bagi2020,
author = {Bagi, Randheer and Dutta, Tanima and Gupta, Hari Prabhat},
doi = {10.1109/ACCESS.2020.3002808},
issn = {21693536},
journal = {IEEE Access},
keywords = {Deep learning,noisy images,scene text detection,text recognition,text spotting},
pages = {111433--111447},
title = {{Cluttered TextSpotter: An End-to-End Trainable Light-Weight Scene Text Spotter for Cluttered Environment}},
volume = {8},
year = {2020}
@article{Zhang2020b,
author = {Zhang, D A N},
number = {1},
pages = {7719--7730},
title = {{RNTR-Net : A Robust Natural Text Recognition Network}},
year = {2020}
@article{Agrahari2020,
author = {Agrahari, Anurag and Ghosh, Rajib},
doi = {10.1016/j.procs.2020.04.033},
issn = {1877-0509},
journal = {Procedia Computer Science},
keywords = {MSER,Multi-oriented text detection,Natural scene image,Stroke width transform},
pages = {322--330},
publisher = {Elsevier B.V.},
title = {{ScienceDirect ScienceDirect IEEE International Symposium on Robotics IEEE International Symposium and Intelli- Text Detection in Natural Scene Images Based on gent Sensors , December Tokyo , Japan Text Detection Natural Scene Images Based on gent December Tokyo , Japan the With the the Intersection Intersection of of MSER MSER With the Locally Locally Binarized Binarized Image Image Editorial Board : Hanafi ah Yussof Editorial Board : Hanafi ah Yussof}},
url = {https://doi.org/10.1016/j.procs.2020.04.033},
volume = {171},
year = {2020}
@article{Xie2020,
author = {Xie, Dong and Bailey, Colleen P},
doi = {10.1117/12.2558206},
keywords = {deep learning,optical character recognition,receipt recognition,text detection,text recognition},
number = {May},
title = {learning algorithms},
year = {2020}
@article{Venkateswarlu2020,
author = {Venkateswarlu, Kethineni and Sudha, Nellore and Pavankumar, P},
keywords = {capital letters,english and also focused,hog,in,in this paper,it is focused on,languages,list of small letters,nn,on two different indian,problem where given a,scene text,special case of the,svm,text},
number = {Vii},
pages = {106--115},
title = {{Implementing HOG features to Recognize Multilingual Characters in Machine Learning}},
volume = {IX},
year = {2020}
@article{Subedi2020,
author = {Subedi, Bharat and Yunusov, Jahongir and Gaybulayev, Abdulaziz and Kim, Tae-hyong},
doi = {10.14372/IEMEK.2020.15.2.51},
issn = {1975-5066},
journal = {IEMEK Journal of Embedded Systems and Applications},
keywords = {Deep learning,Embedded systems,End-to-end approach,Low-cost implementation,Optical character recognition},
number = {2},
pages = {51--60},
title = {{Development of a Low-cost Industrial OCR System with an End-to-end Deep Learning Technology}},
volume = {15},
year = {2020}
@article{Cheng2020,
archivePrefix = {arXiv},
arxivId = {2008.01300},
author = {Cheng, Mengli and Wang, Chengyu and Hu, Xu and Huang, Jun and Wang, Xiaobo},
eprint = {2008.01300},
journal = {arXiv},
keywords = {Automatic speech recognition,Massive vedio data,Optical character recognition,Weakly supervised learning},
title = {{Weakly supervised construction of ASR systems with massive video data}},
year = {2020}
@article{Zhang2020c,
archivePrefix = {arXiv},
arxivId = {2005.13118},
author = {Zhang, Peng and Xu, Yunlu and Cheng, Zhanzhan and Pu, Shiliang and Lu, Jing and Qiao, Liang and Niu, Yi and Wu, Fei},
doi = {10.1145/3394171.3413900},
eprint = {2005.13118},
isbn = {9781450379885},
journal = {arXiv},
keywords = {end-to-end,information extraction,text reading,visually rich},
title = {{Trie: End-to-End text reading and information extraction for document understanding}},
year = {2020}
@article{Zhang2020d,
archivePrefix = {arXiv},
arxivId = {2005.13117},
author = {Zhang, Chengwei and Xu, Yunlu and Cheng, Zhanzhan and Pu, Shiliang and Niu, Yi and Wu, Fei and Zou, Futai},
eprint = {2005.13117},
journal = {arXiv},
keywords = {Deep learning,Inner offset,Neural networks,Scene text recognition,Structure preserving},
title = {{SPIN: Structure-preserving inner offset network for scene text recognition}},
year = {2020}
@article{Xu2020,
author = {Xu, Xing and Chen, Jiefu and Xiao, Jinhui and Gao, Lianli and Shen, Fumin and Shen, Heng Tao},
doi = {10.1109/CVPR42600.2020.01232},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {12301--12311},
title = {{What Machines See Is Not What They Get: Fooling Scene Text Recognition Models with Adversarial Text Images}},
year = {2020}
@article{Reul2020,
author = {Reul, Christian},
title = {{An Intelligent Semi-Automatic Workflow for Optical Character Recognition of Historical Printings}},
year = {2020}
@article{Wang2020b,
archivePrefix = {arXiv},
arxivId = {2001.09389},
author = {Wang, Gang},
eprint = {2001.09389},
journal = {arXiv},
title = {{Scene Text Recognition With Finer Grid Rectification}},
year = {2020}
@article{Hu2020,
archivePrefix = {arXiv},
arxivId = {2002.01276},
author = {Hu, Wenyang and Cai, Xiaocong and Hou, Jun and Yi, Shuai and Lin, Zhiping},
doi = {10.1609/aaai.v34i07.6735},
eprint = {2002.01276},
issn = {2159-5399},
journal = {arXiv},
title = {{GTC: Guided training of CTC towards efficient and accurate scene text recognition}},
year = {2020}
@article{Litman2020,
archivePrefix = {arXiv},
arxivId = {2003.11288},
author = {Litman, Ron and Anschel, Oron and Tsiper, Shahar and Litman, Roee and Mazor, Shai and Manmatha, R.},
doi = {10.1109/CVPR42600.2020.01198},
eprint = {2003.11288},
issn = {10636919},
journal = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {11959--11969},
title = {{Scatter: Selective context attentional scene text recognizer}},
year = {2020}
@article{He2020,
archivePrefix = {arXiv},
arxivId = {1703.06870},
author = {He, Kaiming and Gkioxari, Georgia and Doll{\'{a}}r, Piotr and Girshick, Ross},
doi = {10.1109/TPAMI.2018.2844175},
eprint = {1703.06870},
issn = {19393539},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Instance segmentation,convolutional neural network,object detection,pose estimation},
number = {2},
pages = {386--397},
pmid = {29994331},
title = {{Mask R-CNN}},
volume = {42},
year = {2020}
@article{Liao2020,
archivePrefix = {arXiv},
arxivId = {2007.09482},
author = {Liao, Minghui and Pang, Guan and Huang, Jing and Hassner, Tal and Bai, Xiang},
doi = {10.1007/978-3-030-58621-8_41},
eprint = {2007.09482},
journal = {arXiv},
keywords = {Detection,Recognition,Scene text},
title = {{Mask TextSpotter v3: Segmentation Proposal Network for Robust Scene Text Spotting}},
year = {2020}
@article{Wang2020c,
archivePrefix = {arXiv},
arxivId = {2008.00714},
author = {Wang, Wenhai and Liu, Xuebo and Ji, Xiaozhong and Xie, Enze and Liang, Ding and Yang, Zhibo and Lu, Tong and Shen, Chunhua and Luo, Ping},
doi = {10.1007/978-3-030-58568-6_27},
eprint = {2008.00714},
isbn = {9783030585679},
issn = {16113349},
journal = {arXiv},
keywords = {Text Detection,Text Detection Ambiguity,Text Recognition,Text Spotting},
pages = {1--21},
title = {{AE TextSpotter: Learning visual and linguistic representation for ambiguous text spotting}},
year = {2020}
@article{Jiang2020a,
archivePrefix = {arXiv},
arxivId = {1905.05980},
author = {Jiang, Xiufeng and Xu, Shugong and Zhang, Shunqing and Cao, Shan},
doi = {10.1109/ACCESS.2020.2999069},
eprint = {1905.05980},
issn = {21693536},
journal = {IEEE Access},
keywords = {Scene text detection,arbitrary-shaped,deformable convolutional network,text region representation},
pages = {102106--102118},
title = {{Arbitrary-Shaped Text Detection with Adaptive Text Region Representation}},
volume = {8},
year = {2020}
@inproceedings{Baek2020b,
archivePrefix = {arXiv},
arxivId = {2007.09629},
author = {Baek, Youngmin and Shin, Seung and Baek, Jeonghun and Park, Sungrae and Lee, Junyeop and Nam, Daehyun and Lee, Hwalsuk},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-030-58526-6_30},
eprint = {2007.09629},
isbn = {9783030585259},
issn = {16113349},
keywords = {Character Region Attention,Optical character recognition (OCR),Scene text detection,Scene text recognition,Text spotting},
pages = {504--521},
title = {{Character Region Attention for Text Spotting}},
volume = {12374 LNCS},
year = {2020}
@incollection{Mou2020d,
archivePrefix = {arXiv},
arxivId = {arXiv:1903.09837v1},
author = {Mou, Yongqiang and Tan, Lei and Yang, Hui and Chen, Jingying and Liu, Leyuan and Yan, Rui and Huang, Yaohong},
doi = {10.1007/978-3-030-58555-6_10},
eprint = {arXiv:1903.09837v1},
pages = {158--174},
title = {{PlugNet: Degradation Aware Scene Text Recognition Supervised by a Pluggable Super-Resolution Unit}},
year = {2020}
@inproceedings{Xiao2020a,
archivePrefix = {arXiv},
arxivId = {arXiv:1903.12473v2},
author = {Xiao, Shanyu and Peng, Liangrui and Yan, Ruijie and An, Keyu and Yao, Gang and Min, Jaesik},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-030-58526-6_7},
eprint = {arXiv:1903.12473v2},
isbn = {9783030585259},
issn = {16113349},
keywords = {Deep neural network,Scene text detection,Sequential deformation},
pages = {108--124},
title = {{Sequential Deformation for Accurate Scene Text Detection}},
volume = {12374 LNCS},
year = {2020}
@article{Tian2020,
archivePrefix = {arXiv},
arxivId = {2010.15356},
author = {Tian, Fukang and Wu, Haiyu and Xu, Bo},
eprint = {2010.15356},
title = {{Financial ticket intelligent recognition system based on deep learning}},
url = {http://arxiv.org/abs/2010.15356},
year = {2020}
@article{LouisMarch2016,
author = {{Louis March}, Paul and Glaveanu, Vlad},
doi = {10.1016/B978-0-12-809324-5.23718-8},
isbn = {9780128093245},
journal = {The Curated Reference Collection in Neuroscience and Biobehavioral Psychology},
keywords = {Agency,Art,Clay,Cognitive science,Craft,Creative thinging,Enactive signification,Extended mind,Hylomorphism,Insight,Material,Material Engagement Theory,Ontology,Outsight,Sculpture},
pages = {9365--9374},
title = {{Character Region Awareness for Text Detection}},
year = {2016}
@article{Jaderberg2014,
author = {Jaderberg, Max and Vedaldi, Andrea and Zisserman, Andrew},
title = {{Deep Features for Text Spotting}},
year = {2014}
@inproceedings{Rong2017,
archivePrefix = {arXiv},
arxivId = {arXiv:1609.03605v1},
author = {Rong, Xuejian and Yi, Chucai and Tian, Yingli},
booktitle = {Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017},
doi = {10.1109/CVPR.2017.349},
eprint = {arXiv:1609.03605v1},
isbn = {9781538604571},
keywords = {anchor mechanism,convolutional network,ral network,recurrent neu-,scene text detection},
pages = {3279--3287},
title = {{Unambiguous text localization and retrieval for cluttered scenes}},
volume = {2017-Janua},
year = {2017}
@article{Rong2017a,
author = {Rong, Xuejian and Yi, Chucai and Tian, Yingli},
doi = {10.1109/CVPR.2017.349},
isbn = {9781538604571},
journal = {Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017},
pages = {3279--3287},
title = {{Unambiguous text localization and retrieval for cluttered scenes}},
volume = {2017-January},
year = {2017}
@inproceedings{Tian2016,
archivePrefix = {arXiv},
arxivId = {1609.03605},
author = {Tian, Zhi and Huang, Weilin and He, Tong and He, Pan and Qiao, Yu},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-319-46484-8_4},
eprint = {1609.03605},
isbn = {9783319464831},
issn = {16113349},
keywords = {Anchor mechanism,Convolutional network,Recurrent neural network,Scene text detection},
pages = {56--72},
title = {{WORDFENCE: TEXT DETECTION IN NATURAL IMAGES WITH BORDER AWARENESS}},
volume = {9912 LNCS},
year = {2016}
@inproceedings{Du2020,
archivePrefix = {arXiv},
arxivId = {1911.01051},
author = {Du, Xiangcheng and Ma, Tianlong and Zheng, Yingbin and Ye, Hao and Wu, Xingjiao and He, Liang},
booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
doi = {10.1109/ICASSP40776.2020.9054269},
eprint = {1911.01051},
isbn = {9781509066315},
issn = {15206149},
keywords = {Scene text recognition,sequence model,temporal convolutions},
pages = {2383--2387},
title = {{Scene Text Recognition with Temporal Convolutional Encoder}},
volume = {2020-May},
year = {2020}
@article{Yue2020,
archivePrefix = {arXiv},
arxivId = {arXiv:2007.07542v2},
author = {Yue, Xiaoyu and Kuang, Zhanghui and Lin, Chenhao and Sun, Hongbin},
eprint = {arXiv:2007.07542v2},
title = {{RobustScanner : Dynamically Enhancing Positional Clues for Robust Text Recognition}},
year = {2020}
@article{Zhang2020e,
archivePrefix = {arXiv},
arxivId = {arXiv:2009.06610v1},
author = {Zhang, Chuhan and Zisserman, Andrew},
eprint = {arXiv:2009.06610v1},
number = {1},
title = {{Adaptive Text Recognition through Visual Matching}},
year = {2020}
@article{Goodfellow2014,
archivePrefix = {arXiv},
arxivId = {arXiv:1312.6082v4},
author = {Goodfellow, Ian J and Bulatov, Yaroslav and Ibarz, Julian and Arnoud, Sacha and Shet, Vinay},
eprint = {arXiv:1312.6082v4},
pages = {1--13},
title = {{Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks}},
year = {2014}
@article{Zhong2016,
archivePrefix = {arXiv},
arxivId = {arXiv:1605.07314v1},
author = {Zhong, Zhuoyao and Jin, Lianwen and Zhang, Shuye and Feng, Ziyong},
eprint = {arXiv:1605.07314v1},
pages = {1--12},
title = {{DeepText : A Unified Framework for Text Proposal Generation and Text Detection in Natural Images}},
year = {2016}
@article{Liu2019e,
archivePrefix = {arXiv},
arxivId = {arXiv:1904.00813v1},
author = {Liu, Yuliang and Jin, Lianwen and Xie, Zecheng and Luo, Canjie and Zhang, Shuaitao and Xie, Lele},
eprint = {arXiv:1904.00813v1},
title = {{Tightness-aware Evaluation Protocol for Scene Text Detection}},
year = {2019}
@article{Xu2016,
archivePrefix = {arXiv},
arxivId = {arXiv:1611.06159v1},
author = {Xu, Yan and Shan, Siyuan and Qiu, Ziming and Jia, Zhipeng and Shen, Zhengyang},
eprint = {arXiv:1611.06159v1},
title = {{End-to-End Subtitle Detection and Recognition for Videos in East Asian Languages via CNN Ensemble with Near-Human-Level Performance}},
year = {2016}
@article{Nakamura2017,
archivePrefix = {arXiv},
arxivId = {arXiv:1705.02772v1},
author = {Nakamura, Toshiki and Zhu, Anna and Yanai, Keiji and Uchida, Seiichi},
eprint = {arXiv:1705.02772v1},
title = {{Scene Text Eraser}},
year = {2017}
@article{Wu2017a,
author = {Wu, Yue and Natarajan, Prem and Way, Admiralty and Rey, Marina Del},
title = {{Self-Organized{\_}Text{\_}Detection}},
year = {2017}
@article{Liu2018e,
archivePrefix = {arXiv},
arxivId = {arXiv:1805.08365v1},
author = {Liu, Zichuan and Lin, Guosheng and Yang, Sheng and Feng, Jiashi and Lin, Weisi and Goh, Wang Ling},
eprint = {arXiv:1805.08365v1},
title = {{Learning Markov Clustering Networks for Scene Text Detection}},
year = {2018}
@article{Wang2018b,
author = {Wang, Fangfang and Zhao, Liming and Li, Xi and Wang, Xinchao and Tao, Dacheng},
pages = {1381--1389},
title = {{Geometry-Aware Scene Text Detection with Instance Transformation Network}},
year = {2018}
@article{Zhan2018b,
author = {Zhan, Fangneng and Lu, Shijian and Xue, Chuhui},
keywords = {data augmentation,image synthesis,scene text detection,scene text recognition},
pages = {1--18},
title = {{Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes}},
year = {2018}
@article{Zhu2018a,
archivePrefix = {arXiv},
arxivId = {arXiv:1801.09969v1},
author = {Zhu, Yixing and Du, Jun},
eprint = {arXiv:1801.09969v1},
title = {{Sliding Line Point Regression for Shape Robust Scene Text Detection}},
year = {2018}
@article{Yin2013,
archivePrefix = {arXiv},
arxivId = {arXiv:1301.2628v3},
author = {Yin, Xu-cheng and Yin, Xuwang and Huang, Kaizhu and Hao, Hong-wei},
eprint = {arXiv:1301.2628v3},
pages = {1--10},
title = {{Robust Text Detection in Natural Scene Images}},
year = {2013}
@article{Raisi2020,
archivePrefix = {arXiv},
arxivId = {arXiv:2006.04305v2},
author = {Raisi, Zobeir and Naiel, Mohamed A and Fieguth, Paul and Jun, C V},
eprint = {arXiv:2006.04305v2},
keywords = {deep,learning,text detection,text recognition,wild images},
pages = {13--15},
title = {{Text Detection and Recognition in the Wild : A Review}},
year = {2020}
@article{Yu2016,
archivePrefix = {arXiv},
arxivId = {1608.01471},
author = {Yu, Jiahui and Jiang, Yuning and Wang, Zhangyang and Cao, Zhimin and Huang, Thomas},
doi = {10.1145/2964284.2967274},
eprint = {1608.01471},
isbn = {9781450336031},
journal = {MM 2016 - Proceedings of the 2016 ACM Multimedia Conference},
keywords = {Bounding Box Prediction,IoU Loss,Object Detection},
pages = {516--520},
title = {{UnitBox: An advanced object detection network}},
year = {2016}
@article{Zheng2020,
author = {Zheng, Yuqiang and Xie, Yuan and Qu, Yanyun and Yang, Xiaodong and Li, Cuihua and Zhang, Yan},
doi = {10.1016/j.patcog.2019.107180},
issn = {00313203},
journal = {Pattern Recognition},
keywords = {One-stage,Scale robust,Scene text detection},
number = {xxxx},
pages = {107180},
publisher = {Elsevier Ltd},
title = {{Scale robust deep oriented-text detection network}},
url = {https://doi.org/10.1016/j.patcog.2019.107180},
volume = {102},
year = {2020}
@article{WALLACH2017,
author = {WALLACH, BRET},
doi = {10.2307/j.ctt1d98bxx.10},
issn = {0013-1954},
journal = {A World Made for Money},
pages = {241--294},
title = {{Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks}},
year = {2017}
@article{Berkoff2019,
author = {Berkoff, Steven and Berkoff, Steven},
doi = {10.4324/9780429341144-9},
journal = {A World Elsewhere},
pages = {50--57},
title = {{East: An Efficient and Accurate Scene Text Detector}},
year = {2019}
@article{DeCampos2009,
author = {{De Campos}, Te{\'{o}}filo E. and Babu, Bodla Rakesh and Varma, Manik},
doi = {10.5220/0001770102730280},
isbn = {9789898111692},
journal = {VISAPP 2009 - Proceedings of the 4th International Conference on Computer Vision Theory and Applications},
keywords = {Camera-based character recognition,Digits,Kannada characters,Latin characters,Object recognition,Off-line handwritten character recognition},
number = {Visigrapp},
pages = {273--280},
title = {{Character recognition in natural images}},
volume = {2},
year = {2009}
@article{Gouws2010,
author = {Gouws, Rufus H and Prinsloo, Danie J},
pages = {501--511},
title = {{Thinking out of the box – perspectives on the use of lexicographic text boxes}},
year = {2010}
@article{Karatzas2018,
author = {Karatzas, Dimosthenis},
isbn = {0123456789},
keywords = {-robust reading,cnn,end-to-end systems},
title = {{Cutting Sayre ' s Knot : Reading Scene Text without Segmentation . Application to Utility Meters .}},
year = {2018}
@article{Wang2012,
author = {Wang, Tao and Wu, David J and Ng, Andrew Y},
title = {{End-to-End Text Recognition with Convolutional Neural Networks}},
year = {2012}
@article{Wang2003,
author = {Wang, Feng and Ngo, Chong Wah and Pong, Ting Chuen},
doi = {10.1145/957013.957080},
isbn = {1581137222},
journal = {Proceedings of the ACM International Multimedia Conference and Exhibition},
keywords = {Lecture videos,Synchronization,Video text analysis},
pages = {315--318},
title = {{Synchronization of lecture videos and electronic slides by video text analysis}},
year = {2003}
@article{Dwivedi2020,
author = {Dwivedi, Agam and Sarvadevabhatla, Ravi Kiran},
journal = {CVPR2020},
title = {{An OCR for Classical Indic Documents Containing Arbitrarily Long Words}},
year = {2020}
@article{Netzer2011,
author = {Netzer, Yuval and Wang, Tao and Coates, Adam and Bissacco, Alessandro and Wu, Bo and Ng, Andrew Y},
issn = {00428817},
journal = {NIPS Workshop on Deep Learning and Unsupervised Feature Learning},
number = {5},
pages = {9--13},
pmid = {13006084},
title = {{Reading Digits in Natural Images with Unsupervised Feature Learning}},
url = {http://ufldl.stanford.edu/housenumbers/nips2011{\_}housenumbers.pdf},
volume = {16},
year = {2011}