<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Discrete and Continuous Models and Applied Computational Science</journal-id><journal-title-group><journal-title xml:lang="en">Discrete and Continuous Models and Applied Computational Science</journal-title><trans-title-group xml:lang="ru"><trans-title>Discrete and Continuous Models and Applied Computational Science</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2658-4670</issn><issn publication-format="electronic">2658-7149</issn><publisher><publisher-name xml:lang="en">Peoples' Friendship University of Russia named after Patrice Lumumba (RUDN University)</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">43666</article-id><article-id pub-id-type="doi">10.22363/2658-4670-2024-32-4-370-379</article-id><article-id pub-id-type="edn">EPGKRU</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Computer Science</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Информатика и вычислительная техника</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">MMEmAsis: multimodal emotion and sentiment analysis</article-title><trans-title-group xml:lang="ru"><trans-title>ММЕмАсис: мультимодальный метод оценки психофизиологического состояния человека</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-9231-8662</contrib-id><contrib-id contrib-id-type="scopus">57195683637</contrib-id><contrib-id contrib-id-type="researcherid">Y-6971-2018</contrib-id><name-alternatives><name xml:lang="en"><surname>Kiselev</surname><given-names>Gleb A.</given-names></name><name xml:lang="ru"><surname>Киселёв</surname><given-names>Г. А.</given-names></name></name-alternatives><bio xml:lang="en"><p>Candidate of Technical Sciences, Senior Lecturer at the Department of Mathematical Modeling and Artificial Intelligence of RUDN University; Researcher of Federal Research Center “Computer Science and Control” of the Russian Academy of Sciences</p></bio><email>kiselev@isa.ru</email><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-6280-6040</contrib-id><name-alternatives><name xml:lang="en"><surname>Lubysheva</surname><given-names>Yaroslava M.</given-names></name><name xml:lang="ru"><surname>Лубышева</surname><given-names>Я. М.</given-names></name></name-alternatives><bio xml:lang="en"><p>Master’s degree student of Department of Mathematical Modeling and Artificial Intelligence</p></bio><email>gorbunova_y_m@mail.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-2787-0714</contrib-id><name-alternatives><name xml:lang="en"><surname>Weizenfeld</surname><given-names>Daniil A.</given-names></name><name xml:lang="ru"><surname>Вейценфельд</surname><given-names>Д. А.</given-names></name></name-alternatives><bio xml:lang="en"><p>Master’s degree student of Department of Mechanics and Control Processes</p></bio><email>veicenfeld@isa.ru</email><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">RUDN University</institution></aff><aff><institution xml:lang="ru">Российский университет дружбы народов</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Federal Research Center “Computer Science and Control” of the Russian Academy of Sciences</institution></aff><aff><institution xml:lang="ru">Федеральный исследовательский центр «Информатика и управление» Российской академии наук</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2024-12-15" publication-format="electronic"><day>15</day><month>12</month><year>2024</year></pub-date><volume>32</volume><issue>4</issue><issue-title xml:lang="en">VOL 32, NO4 (2024)</issue-title><issue-title xml:lang="ru">ТОМ 32, №4 (2024)</issue-title><fpage>370</fpage><lpage>379</lpage><history><date date-type="received" iso-8601-date="2025-04-05"><day>05</day><month>04</month><year>2025</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2024, Kiselev G.A., Lubysheva Y.M., Weizenfeld D.A.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2024, Киселёв Г.А., Лубышева Я.М., Вейценфельд Д.А.</copyright-statement><copyright-year>2024</copyright-year><copyright-holder xml:lang="en">Kiselev G.A., Lubysheva Y.M., Weizenfeld D.A.</copyright-holder><copyright-holder xml:lang="ru">Киселёв Г.А., Лубышева Я.М., Вейценфельд Д.А.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by-nc/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.rudn.ru/miph/article/view/43666">https://journals.rudn.ru/miph/article/view/43666</self-uri><abstract xml:lang="en"><p>The paper presents a new multimodal approach to analyzing the psycho-emotional state of a person using nonlinear classifiers. The main modalities are the subject’s speech data and video data of facial expressions. Speech is digitized and transcribed using the Scribe library, and then mood cues are extracted using the Titanis sentiment analyzer from the FRC CSC RAS. For visual analysis, two different approaches were implemented: a pre-trained ResNet model for direct sentiment classification from facial expressions, and a deep learning model that integrates ResNet with a graph-based deep neural network for facial recognition. Both approaches have faced challenges related to environmental factors affecting the stability of results. The second approach demonstrated greater flexibility with adjustable classification vocabularies, which facilitated post-deployment calibration. Integration of text and visual data has significantly improved the accuracy and reliability of the analysis of a person’s psycho-emotional state</p></abstract><trans-abstract xml:lang="ru"><p>В статье представлен новый мультимодальный подход анализа психоэмоционального состояния человека с помощью нелинейных классификаторов. Основными модальностями являются данные речи испытуемого и видеоданные мимики. Речь оцифровывается и транскрибируется библиотекой Писец, признаки настроения извлекаются системой Titanis от ФИЦ ИУ РАН. Для визуального анализа были реализованы два различных подхода: дообученная модель ResNet для прямой классификации настроений по выражениям лица и модель глубокого обучения, интегрирующая ResNet с основанной на графах глубокой нейронной сетью для распознавания мимических признаков. Оба подхода сталкивались с трудностями, связанными с факторами окружающей среды, влияющими на стабильность результатов. Второй подход продемонстрировал бóльшую гибкость благодаря регулируемым словарям классификации, что облегчало калибровку после развёртывания. Интеграция текстовых и визуальных данных значительно улучшила точность и надёжность анализа психоэмоционального состояния человека.</p></trans-abstract><kwd-group xml:lang="en"><kwd>dataset</kwd><kwd>emotion analysis</kwd><kwd>multimodal data mining</kwd><kwd>artificial intelligence</kwd><kwd>machine learning</kwd><kwd>deep learning</kwd><kwd>neuroscience data mining</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>набор данных</kwd><kwd>анализ эмоций</kwd><kwd>мультимодальный анализ данных</kwd><kwd>искусственный интеллект</kwd><kwd>машинное обучение</kwd><kwd>глубокое обучение</kwd><kwd>анализ нейрофизиологических данных</kwd></kwd-group><funding-group><funding-statement xml:lang="en">This paper has been supported by the RUDN University Strategic Academic Leadership Program</funding-statement></funding-group></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Piana, S., Staglianò, A., Odone, F.,Verri, A. &amp; Camurri, A. Real-timeAutomaticEmotionRecognition from Body Gestures 2014. doi:10.48550/arXiv.1402.5047.</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Hu, G., Lin, T., Zhao, Y., Lu, G., Wu, Y. &amp; Li, Y. UniMSE: Towards Unified Multimodal Sentiment Analysis and Emotion Recognition. Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. doi:10.48550/arXiv.2211.11256 (2022).</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Zhao, J., Zhang, T., Hu, J., Liu, Y., Jin, Q., Wang, X. &amp; Li, H. M3ED: Multi-modal Multi-scene Multi-label Emotional Dialogue Database in Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) (Association for Computational Linguistics, Dublin, Ireland, May, 2022, 2022), 5699-5710. doi:10.18653/v1/2022.acl-long.391.</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Poria, S., Hazarika, D., Majumder, N., Naik, G., Cambria, E. &amp; Mihalcea, R. MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversations. Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. doi:10.48550/arXiv.1810.02508 (2018).</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Ekman, P. Emotion: common characteristics and individual differences. Lecture presented at 8th World Congress of I.O.P. Tampere Finland (1996).</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Levenson, R. W. The intrapersonal functions of emotion. Cognition &amp; Emotion 13, 481-504 (1999).</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Keltner, D. &amp; Gross, J. Functional accounts of emotions. Cognition &amp; Emotion 13, 467-480 (1999).</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Ferdous, A., Bari, A. &amp; Gavrilova, M. Emotion Recognition From Body Movement. IEEE Access. doi:10.1109/ACCESS.2019.2963113 (Dec. 2019).</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Zadeh, A., Liang, P., Poria, S., Cambria, E. &amp; Morency, L.-P. Multimodal Language Analysis in the Wild: CMU-MOSEI Dataset and Interpretable Dynamic Fusion Graph in Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) (July 2018), 2236-2246. doi:10.18653/v1/P18-1208.</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Busso, C., Bulut, M. &amp; Lee, C. e. a. IEMOCAP: interactive emotional dyadic motion capture database. Lang Resources &amp; Evaluation 42, 335-359. doi:10.1007/s10579-008-9076-6 (2008).</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Kossaifi, J. et al. SEWA DB: A Rich Database for Audio-Visual Emotion and Sentiment Research in the Wild. IEEE Transactions on Pattern Analysis and Machine Intelligence 13. doi:10.1109/TPAMI.2019.2944808 (Oct. 2019).</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>O’Reilly, H., Pigat, D., Fridenson, S., Berggren, S., Tal, S., Golan, O., Bölte, S., Baron-Cohen, S. &amp; Lundqvist, D. The EU-Emotion Stimulus Set: A validation study. Behav Res Methods 48, 567-576. doi:10.3758/s13428-015-0601-4 (2016).</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>Soleymani, M., Lichtenauer, J., Pun, T. &amp; Pantic, M. A Multimodal Database for Affect Recognition and Implicit Tagging. IEEE Transactions on Affective Computing 3, 42-55. doi:10.1109/T-AFFC.2011.25 (2012).</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Chou, H. C., Lin, W. C., Chang, L. C., Li, C. C., Ma, H. P. &amp; Lee, C. C. NNIME: The NTHU-NTUA Chinese interactive multimodal emotion corpus in 2017 Seventh International Conference on Affective Computing and Intelligent Interaction (ACII) (2017), 292-298. doi:10.1109/ACII.2017.8273615.</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Ringeval, F., Sonderegger, A., Sauer, J. &amp; Lalanne, D. Introducing the RECOLA multimodal corpus of remote collaborative and affective interactions in 2013 10th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG) (2013), 1-8. doi:10.1109/FG.2013.6553805.</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Reznikova, J. I. Intelligence and language in animals and humans 253 pp. (Yurayt, 2016).</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Samokhvalov, V. P., Kornetov, A. N., Korobov, A. A. &amp; Kornetov, N. A. Ethology in psychiatry 217 pp. (Health, 1990).</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Gullett, N., Zajkowska, Z., Walsh, A., Harper, R. &amp; Mondelli, V. Heart rate variability (HRV) as a way to understand associations between the autonomic nervous system (ANS) and affective states: A critical review of the literature. International Journal of Psychophysiology 192, 35-42. doi:10.1016/j.ijpsycho.2023.08.001 (2023).</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>Bondarenko, I. Pisets: A Python library and service for automatic speech recognition and transcribing in Russian and English https://github.com/bond005/pisets.</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Savchenko, A. V. Facial expression and attributes recognition based on multi-task learning of lightweight neural networks in 2021 IEEE 19th International Symposium on Intelligent Systems and Informatics (SISY) (2021), 119-124.</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Luo, C., Song, S., Xie, W., Shen, L. &amp; Gunes, H. Learning multi-dimensional edge feature-based au relation graph for facial action unit recognition. arXiv preprint arXiv:2205.01782 (2022).</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Gajarsky, T. Facetorch: A Python library for analysing faces using PyTorch https://github.com/tomasgajarsky/facetorch.</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Deng, J., Guo, J., Ververas, E., Kotsia, I. &amp; Zafeiriou, S. Retinaface: Single-shot multi-level face localisation in the wild in Proceedings of the IEEE/CVF conference on computer vision and pattern recognition (2020), 5203-5212.</mixed-citation></ref></ref-list></back></article>
