diff --git a/Fazeli_Shahroudi-Sepehr-Mastersthesis.bbl b/Fazeli_Shahroudi-Sepehr-Mastersthesis.bbl index 65456746d19ed3be1137a159132ea399f31520f0..573d9c713990c6ac5c9215dd196d092c97d9f0d6 100644 --- a/Fazeli_Shahroudi-Sepehr-Mastersthesis.bbl +++ b/Fazeli_Shahroudi-Sepehr-Mastersthesis.bbl @@ -1,5 +1,5 @@ % Generated by IEEEtran.bst, version: 1.14 (2015/08/26) -\begin{thebibliography}{1} +\begin{thebibliography}{10} \providecommand{\url}[1]{#1} \csname url@samestyle\endcsname \providecommand{\newblock}{\relax} @@ -21,6 +21,175 @@ \providecommand{\BIBdecl}{\relax} \BIBdecl +\bibitem{gonzalez_digital_2008-1} +R.~C. Gonzalez and R.~E. Woods, \emph{\BIBforeignlanguage{en}{Digital {Image} + {Processing}}}.\hskip 1em plus 0.5em minus 0.4em\relax Prentice Hall, 2008, + google-Books-ID: 8uGOnjRGEzoC. + +\bibitem{jain_fundamentals_1989-1} +\BIBentryALTinterwordspacing +A.~K. Jain, \emph{\BIBforeignlanguage{eng}{Fundamentals of digital image + processing}}.\hskip 1em plus 0.5em minus 0.4em\relax Englewood Cliffs, NJ : + Prentice Hall, 1989. [Online]. Available: + \url{http://archive.org/details/fundamentalsofdi0000jain} +\BIBentrySTDinterwordspacing + +\bibitem{russ_image_2016} +J.~C. Russ, \emph{\BIBforeignlanguage{en}{The {Image} {Processing} + {Handbook}}}.\hskip 1em plus 0.5em minus 0.4em\relax CRC Press, Apr. 2016, + google-Books-ID: gxXXRJWfEsoC. + +\bibitem{szeliski_introduction_2022} +\BIBentryALTinterwordspacing +R.~Szeliski, ``\BIBforeignlanguage{en}{Introduction},'' in + \emph{\BIBforeignlanguage{en}{Computer {Vision}: {Algorithms} and + {Applications}}}, R.~Szeliski, Ed.\hskip 1em plus 0.5em minus 0.4em\relax + Cham: Springer International Publishing, 2022, pp. 1--26. [Online]. + Available: \url{https://doi.org/10.1007/978-3-030-34372-9_1} +\BIBentrySTDinterwordspacing + +\bibitem{goodfellow_deep_2016} +I.~Goodfellow, Y.~Bengio, and A.~Courville, \emph{\BIBforeignlanguage{en}{Deep + {Learning}}}.\hskip 1em plus 0.5em minus 0.4em\relax MIT Press, Nov. 2016, + google-Books-ID: Np9SDQAAQBAJ. + +\bibitem{bradski_learning_2008} +\BIBentryALTinterwordspacing +G.~R. Bradski, \emph{\BIBforeignlanguage{eng}{Learning {OpenCV} : computer + vision with the {OpenCV} library}}.\hskip 1em plus 0.5em minus 0.4em\relax + Sebastopol, CA : O'Reilly, 2008. [Online]. Available: + \url{http://archive.org/details/learningopencvco0000brad} +\BIBentrySTDinterwordspacing + +\bibitem{cooley_algorithm_1965} +\BIBentryALTinterwordspacing +J.~W. Cooley and J.~W. Tukey, ``An {Algorithm} for the {Machine} {Calculation} + of {Complex} {Fourier} {Series},'' \emph{Mathematics of Computation}, + vol.~19, no.~90, pp. 297--301, 1965, publisher: American Mathematical + Society. [Online]. Available: \url{https://www.jstor.org/stable/2003354} +\BIBentrySTDinterwordspacing + +\bibitem{hounsfield_computerized_1973} +\BIBentryALTinterwordspacing +G.~N. Hounsfield, ``Computerized transverse axial scanning (tomography): {Part} + 1. {Description} of system,'' \emph{British Journal of Radiology}, vol.~46, + no. 552, pp. 1016--1022, Dec. 1973. [Online]. Available: + \url{https://doi.org/10.1259/0007-1285-46-552-1016} +\BIBentrySTDinterwordspacing + +\bibitem{lecun_deep_2015} +Y.~LeCun, Y.~Bengio, and G.~Hinton, ``Deep learning,'' \emph{Nature}, vol. 521, + no. 7553, pp. 436--444, 2015, place: United Kingdom Publisher: Nature + Publishing Group. + +\bibitem{hinton_improving_2012} +\BIBentryALTinterwordspacing +G.~E. Hinton, N.~Srivastava, A.~Krizhevsky, I.~Sutskever, and R.~R. + Salakhutdinov, ``Improving neural networks by preventing co-adaptation of + feature detectors,'' Jul. 2012, arXiv:1207.0580 [cs]. [Online]. Available: + \url{http://arxiv.org/abs/1207.0580} +\BIBentrySTDinterwordspacing + +\bibitem{zhang_efficient_2023} +\BIBentryALTinterwordspacing +D.~Zhang, X.~Hao, D.~Wang, C.~Qin, B.~Zhao, L.~Liang, and W.~Liu, + ``\BIBforeignlanguage{en}{An efficient lightweight convolutional neural + network for industrial surface defect detection},'' + \emph{\BIBforeignlanguage{en}{Artificial Intelligence Review}}, vol.~56, + no.~9, pp. 10\,651--10\,677, Sep. 2023. [Online]. Available: + \url{https://doi.org/10.1007/s10462-023-10438-y} +\BIBentrySTDinterwordspacing + +\bibitem{litjens_survey_2017} +\BIBentryALTinterwordspacing +G.~Litjens, T.~Kooi, B.~E. Bejnordi, A.~A.~A. Setio, F.~Ciompi, M.~Ghafoorian, + J.~A. W. M. v.~d. Laak, B.~v. Ginneken, and C.~I. Sánchez, ``A {Survey} on + {Deep} {Learning} in {Medical} {Image} {Analysis},'' \emph{Medical Image + Analysis}, vol.~42, pp. 60--88, Dec. 2017, arXiv:1702.05747 [cs]. [Online]. + Available: \url{http://arxiv.org/abs/1702.05747} +\BIBentrySTDinterwordspacing + +\bibitem{maimaitijiang_soybean_2020} +\BIBentryALTinterwordspacing +M.~Maimaitijiang, ``Soybean yield prediction from {UAV} using multimodal data + fusion and deep learning,'' \emph{Remote Sensing of Environment}, Jan. 2020. + [Online]. Available: + \url{https://www.academia.edu/84238554/Soybean_yield_prediction_from_UAV_using_multimodal_data_fusion_and_deep_learning} +\BIBentrySTDinterwordspacing + +\bibitem{janai_computer_2021} +\BIBentryALTinterwordspacing +J.~Janai, F.~Güney, A.~Behl, and A.~Geiger, ``Computer {Vision} for + {Autonomous} {Vehicles}: {Problems}, {Datasets} and {State} of the {Art},'' + Mar. 2021, arXiv:1704.05519 [cs]. [Online]. Available: + \url{http://arxiv.org/abs/1704.05519} +\BIBentrySTDinterwordspacing + +\bibitem{ren_faster_2016} +\BIBentryALTinterwordspacing +S.~Ren, K.~He, R.~Girshick, and J.~Sun, ``Faster {R}-{CNN}: {Towards} + {Real}-{Time} {Object} {Detection} with {Region} {Proposal} {Networks},'' + Jan. 2016, arXiv:1506.01497 [cs]. [Online]. Available: + \url{http://arxiv.org/abs/1506.01497} +\BIBentrySTDinterwordspacing + +\bibitem{ragan-kelley_halide_2013} +J.~Ragan-Kelley, C.~Barnes, A.~Adams, S.~Paris, F.~Durand, and S.~Amarasinghe, + \emph{Halide: {A} {Language} and {Compiler} for {Optimizing} {Parallelism}, + {Locality}, and {Recomputation} in {Image} {Processing} {Pipelines}}, Jun. + 2013, vol.~48, journal Abbreviation: ACM SIGPLAN Notices Pages: 530 + Publication Title: ACM SIGPLAN Notices. + +\bibitem{szeliski_image_2022} +\BIBentryALTinterwordspacing +R.~Szeliski, ``\BIBforeignlanguage{en}{Image {Processing}},'' in + \emph{\BIBforeignlanguage{en}{Computer {Vision}: {Algorithms} and + {Applications}}}, R.~Szeliski, Ed.\hskip 1em plus 0.5em minus 0.4em\relax + Cham: Springer International Publishing, 2022, pp. 85--151. [Online]. + Available: \url{https://doi.org/10.1007/978-3-030-34372-9_3} +\BIBentrySTDinterwordspacing + +\bibitem{russell_artificial_2016} +\BIBentryALTinterwordspacing +S.~J. Russell, P.~Norvig, E.~Davis, and D.~Edwards, \emph{Artificial + intelligence a modern approach}, third edition, global edition~ed.\hskip 1em + plus 0.5em minus 0.4em\relax Boston: Pearson, 2016. [Online]. Available: + \url{http://www.gbv.de/dms/tib-ub-hannover/848811429.pdf} +\BIBentrySTDinterwordspacing + +\bibitem{kulpa_universal_1981} +Z.~Kulpa, ``\BIBforeignlanguage{en}{Universal digital image processing systems + in europe — {A} comparative survey},'' in + \emph{\BIBforeignlanguage{en}{Digital {Image} {Processing} {Systems}}}, + L.~Bloc and Z.~Kulpa, Eds.\hskip 1em plus 0.5em minus 0.4em\relax Berlin, + Heidelberg: Springer, 1981, pp. 1--20. + +\bibitem{sahebi_distributed_2023} +\BIBentryALTinterwordspacing +A.~Sahebi, M.~Barbone, M.~Procaccini, W.~Luk, G.~Gaydadjiev, and R.~Giorgi, + ``Distributed large-scale graph processing on {FPGAs},'' \emph{Journal of Big + Data}, vol.~10, no.~1, p.~95, Jun. 2023. [Online]. Available: + \url{https://doi.org/10.1186/s40537-023-00756-x} +\BIBentrySTDinterwordspacing + +\bibitem{ma_new_2024} +\BIBentryALTinterwordspacing +X.~Ma, Y.~Jiang, H.~Liu, C.~Zhou, and K.~Gu, ``A {New} {Image} {Quality} + {Database} for {Multiple} {Industrial} {Processes},'' Feb. 2024, + arXiv:2401.13956 [cs]. [Online]. Available: + \url{http://arxiv.org/abs/2401.13956} +\BIBentrySTDinterwordspacing + +\bibitem{chisholm_fpga-based_2020} +\BIBentryALTinterwordspacing +T.~Chisholm, R.~Lins, and S.~Givigi, ``{FPGA}-{Based} {Design} for + {Real}-{Time} {Crack} {Detection} {Based} on {Particle} {Filter},'' + \emph{IEEE Transactions on Industrial Informatics}, vol.~16, no.~9, pp. + 5703--5711, Sep. 2020, conference Name: IEEE Transactions on Industrial + Informatics. [Online]. Available: + \url{https://ieeexplore.ieee.org/document/8888239} +\BIBentrySTDinterwordspacing + \bibitem{ferreira_generic_2024} D.~Ferreira, F.~Moutinho, J.~P. Matos-Carvalho, M.~Guedes, and P.~Deusdado, ``\BIBforeignlanguage{eng}{Generic {FPGA} {Pre}-{Processing} {Image} @@ -28,4 +197,104 @@ D.~Ferreira, F.~Moutinho, J.~P. Matos-Carvalho, M.~Guedes, and P.~Deusdado, \emph{\BIBforeignlanguage{eng}{Sensors (Basel, Switzerland)}}, vol.~24, no.~18, p. 6101, Sep. 2024. +\bibitem{lai_image_2001} +B.-C. Lai, {Phillip}, and P.~McKerrow, ``Image {Processing} {Libraries},'' Jan. + 2001. + +\bibitem{perez_super-resolution_2014} +\BIBentryALTinterwordspacing +J.~Pérez, E.~Magdaleno, F.~Pérez, M.~RodrÃguez, D.~Hernández, and + J.~Corrales, ``\BIBforeignlanguage{en}{Super-{Resolution} in {Plenoptic} + {Cameras} {Using} {FPGAs}},'' \emph{\BIBforeignlanguage{en}{Sensors}}, + vol.~14, no.~5, pp. 8669--8685, May 2014, number: 5 Publisher: + Multidisciplinary Digital Publishing Institute. [Online]. Available: + \url{https://www.mdpi.com/1424-8220/14/5/8669} +\BIBentrySTDinterwordspacing + +\bibitem{rao_comparative_2023} +\BIBentryALTinterwordspacing +M.~N. Rao, ``\BIBforeignlanguage{en}{A {Comparative} {Analysis} of {Deep} + {Learning} {Frameworks} and {Libraries}},'' + \emph{\BIBforeignlanguage{en}{International Journal of Intelligent Systems + and Applications in Engineering}}, vol.~11, no.~2s, pp. 337--342, Jan. 2023, + number: 2s. [Online]. Available: + \url{https://ijisae.org/index.php/IJISAE/article/view/2707} +\BIBentrySTDinterwordspacing + +\bibitem{ciora_industrial_2014} +\BIBentryALTinterwordspacing +R.~A. Ciora and C.~M. Simion, ``\BIBforeignlanguage{en}{Industrial + {Applications} of {Image} {Processing}},'' \emph{\BIBforeignlanguage{en}{ACTA + Universitatis Cibiniensis}}, vol.~64, no.~1, pp. 17--21, Nov. 2014. [Online]. + Available: \url{https://www.sciendo.com/article/10.2478/aucts-2014-0004} +\BIBentrySTDinterwordspacing + +\bibitem{sandvik_comparative_2024} +\BIBentryALTinterwordspacing +Y.~J. Sandvik, C.~M. Futsæther, K.~H. Liland, and O.~Tomic, + ``\BIBforeignlanguage{en}{A {Comparative} {Literature} {Review} of {Machine} + {Learning} and {Image} {Processing} {Techniques} {Used} for {Scaling} and + {Grading} of {Wood} {Logs}},'' \emph{\BIBforeignlanguage{en}{Forests}}, + vol.~15, no.~7, p. 1243, Jul. 2024, number: 7 Publisher: Multidisciplinary + Digital Publishing Institute. [Online]. Available: + \url{https://www.mdpi.com/1999-4907/15/7/1243} +\BIBentrySTDinterwordspacing + +\bibitem{sardar_role_2012} +\BIBentryALTinterwordspacing +H.~Sardar, ``A role of computer system for comparative analysis using image + processing to promote agriculture business,'' \emph{International journal of + engineering research and technology}, Nov. 2012. [Online]. Available: + \url{https://www.semanticscholar.org/paper/A-role-of-computer-system-for-comparative-analysis-Sardar/6e2fd48a1025b68951f511abe05f8451f753eb47} +\BIBentrySTDinterwordspacing + +\bibitem{vieira_performance_2024} +\BIBentryALTinterwordspacing +R.~Vieira, D.~Silva, E.~Ribeiro, L.~Perdigoto, and P.~J. Coelho, + ``\BIBforeignlanguage{en}{Performance {Evaluation} of {Computer} {Vision} + {Algorithms} in a {Programmable} {Logic} {Controller}: {An} {Industrial} + {Case} {Study}},'' \emph{\BIBforeignlanguage{en}{Sensors}}, vol.~24, no.~3, + p. 843, Jan. 2024, number: 3 Publisher: Multidisciplinary Digital Publishing + Institute. [Online]. Available: \url{https://www.mdpi.com/1424-8220/24/3/843} +\BIBentrySTDinterwordspacing + +\bibitem{wu_precision_2022} +\BIBentryALTinterwordspacing +S.~Wu, H.~Yang, X.~Liu, and R.~Jia, ``\BIBforeignlanguage{English}{Precision + control of polyurethane filament drafting and winding based on machine + vision},'' \emph{\BIBforeignlanguage{English}{Frontiers in Bioengineering and + Biotechnology}}, vol.~10, Sep. 2022, publisher: Frontiers. [Online]. + Available: + \url{https://www.frontiersin.org/journals/bioengineering-and-biotechnology/articles/10.3389/fbioe.2022.978212/full} +\BIBentrySTDinterwordspacing + +\bibitem{zhu_machine_2022} +\BIBentryALTinterwordspacing +Q.~Zhu, Y.~Zhang, J.~Luan, and L.~Hu, ``\BIBforeignlanguage{en}{A {Machine} + {Vision} {Development} {Framework} for {Product} {Appearance} {Quality} + {Inspection}},'' \emph{\BIBforeignlanguage{en}{Applied Sciences}}, vol.~12, + no.~22, p. 11565, Jan. 2022, number: 22 Publisher: Multidisciplinary Digital + Publishing Institute. [Online]. Available: + \url{https://www.mdpi.com/2076-3417/12/22/11565} +\BIBentrySTDinterwordspacing + +\bibitem{reis_developments_2023} +\BIBentryALTinterwordspacing +M.~J. C.~S. Reis, ``\BIBforeignlanguage{en}{Developments of {Computer} {Vision} + and {Image} {Processing}: {Methodologies} and {Applications}},'' + \emph{\BIBforeignlanguage{en}{Future Internet}}, vol.~15, no.~7, p. 233, Jul. + 2023, number: 7 Publisher: Multidisciplinary Digital Publishing Institute. + [Online]. Available: \url{https://www.mdpi.com/1999-5903/15/7/233} +\BIBentrySTDinterwordspacing + +\bibitem{ziaja_benchmarking_2021} +\BIBentryALTinterwordspacing +M.~Ziaja, P.~Bosowski, M.~Myller, G.~Gajoch, M.~Gumiela, J.~Protich, K.~Borda, + D.~Jayaraman, R.~Dividino, and J.~Nalepa, + ``\BIBforeignlanguage{en}{Benchmarking {Deep} {Learning} for {On}-{Board} + {Space} {Applications}},'' \emph{\BIBforeignlanguage{en}{Remote Sensing}}, + vol.~13, no.~19, p. 3981, Oct. 2021. [Online]. Available: + \url{https://www.mdpi.com/2072-4292/13/19/3981} +\BIBentrySTDinterwordspacing + \end{thebibliography} diff --git a/Fazeli_Shahroudi-Sepehr-Mastersthesis.tex b/Fazeli_Shahroudi-Sepehr-Mastersthesis.tex index a2f84ba185ee5e6e3b6d06e2e3bb5e2b4bd53862..23b0f9694c2b81e68b6e70800e0382a625d2f7cd 100644 --- a/Fazeli_Shahroudi-Sepehr-Mastersthesis.tex +++ b/Fazeli_Shahroudi-Sepehr-Mastersthesis.tex @@ -20,12 +20,13 @@ \usepackage{textcomp} \usepackage{enumitem} \usepackage{multirow} +\usepackage{cellspace} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \newcommand{\thesistitleDE}{Vergleichende Evaluierung von Bildverarbeitungsbibliotheken für industrielle Anwendungen bei Dassault Systems} \newcommand{\thesistitleEN}{Comparative Evaluation of Image Processing Libraries for Industrial Applications at Dassault Systems} \newcommand{\student}{Sepehr Fazeli Shahroudi} \newcommand{\matrnr}{12200627} -\newcommand{\submissiondate}{01.\ Sep 2024} +\newcommand{\submissiondate}{24.\ Mar 2025} \newcommand{\supervisor}{Prof.~Dr.~Schober} \newcommand{\secsupervisor}{Martin Steglich} \newcommand{\faculty}{Angewandte Informatik} diff --git a/chapters/1-Introduction.tex b/chapters/1-Introduction.tex index b4f51b422c091ce5f407097ba2c4aa60642afd62..dc7f45078ddef6deb984d829c0c2f5d0dda44801 100755 --- a/chapters/1-Introduction.tex +++ b/chapters/1-Introduction.tex @@ -2,10 +2,10 @@ \input{sections/Chapter-1-sections/General-Introduction.tex} -\input{sections/Chapter-1-sections/Relevance.tex} +% \input{sections/Chapter-1-sections/Relevance.tex} \input{sections/Chapter-1-sections/Aim-and-Objectives.tex} -\input{sections/Chapter-1-sections/Research-Questions.tex} +% \input{sections/Chapter-1-sections/Research-Questions.tex} \input{sections/Chapter-1-sections/Related-Work.tex} \ No newline at end of file diff --git a/chapters/2-Methodology.tex b/chapters/2-Methodology.tex index 7a92f8ce58427b2098b23441e4b757e7bb2c40e9..83a36748824140625782745d8fc035081798a5e4 100644 --- a/chapters/2-Methodology.tex +++ b/chapters/2-Methodology.tex @@ -1,17 +1,97 @@ \chapter{Methodology} -This chapter outlines the methodology used to compare various image processing libraries. The evaluation is grounded in two core performance metrics: \textbf{Image Conversion} and \textbf{pixel iteration}. These metrics provide a basis for comparing the efficiency and responsiveness of different libraries in performing fundamental image processing tasks. In the following sections, we explain why these metrics were chosen, how they are measured, how the results are processed, and the criteria for selecting the libraries under investigation. +This chapter outlines the journey and rationale behind the methodology for comparing various image processing libraries. It explains the choice of performance metrics, describes how the metrics were obtained and processed and details the criteria used to select the libraries under investigation. The aim is to provide an approach that not only yields quantitative insights but also connects with real-world applications. -\input{sections/Chapter-2-sections/Performance-Metrics.tex} -\input{sections/Chapter-2-sections/Rationale.tex} -\input{sections/Chapter-2-sections/Measurement-Procedure.tex} -\input{sections/Chapter-2-sections/Data-Analysis.tex} -\input{sections/Chapter-2-sections/Library-Selection.tex} +\section{Selection of Libraries for Comparison} +The choice of libraries for this study was driven by several factors, including functionality, licensing, ease of integration, and performance potential. Most of image processing libraries provided wrappers or bindings for .NET, the language of choice for this experiments. The search of libraries revealed a wide range of options—from the commercial ImageSharp to various open-source alternatives such as OpenCvSharp, Emgu CV, SkiaSharp, Magick.NET, and others. +With consideration of real-world image processing applications needs, certain technical features were considered essential for the evaluation, such as support for common image formats (JPEG, PNG, BMP, WebP, etc.), mutative operations (e.g., pixel manipulation, color space conversion), and high-level operations (e.g., image composition, filtering). All libraries were evaluated based on their ability to handle these tasks efficiently by inspecting their APIs and documentation. Also the licensing model, integration effort, and community support were considered to ensure that the selected libraries were not only technically capable but also practical for real-world applications. The data gathered from this is available including the table of feature comparison that was created for each library, and available in the appendix (see Chapter ~\ref{appendix:evaluation-libraries}). +\begin{center} + \includegraphics[width=33em]{media/Methodology - selection.png} + \captionof{figure}{This figure shows the selected libraries for comparison and their main features for each or combination of libraries.} + \label{fig:selected-libraries} +\end{center} +As a result of this research, a clear picture of each library's capabilities was developed, and the most suitable candidates for the performance tests were identified. Consequently, 5 suggested libraries or combinations of libraries were selected for the comparative evaluation: ImageSharp and Magick.NET as single library solutions, given their capabilities to cover both lightweight and complex image processing tasks, and the combinations of OpenCvSharp with SkiaSharp, and Emgu CV with SkiaSharp, as they complement each other in terms of performance and functionality. +\section{Performance Metrics and Criteria for Comparison} +Image processing is an integral part of many modern applications, from web services to real-time computer vision systems. To compare libraries by comparing their performance and practicality using a controlled benchmarking environment. The study focused on two key performance metrics: \textbf{Image Conversion} and \textbf{Pixel Iteration}. These metrics were selected because they represent foundational operations in image processing workflows, forming the building blocks for more complex tasks. +The decision to focus on image conversion and pixel iteration was based on the need for metrics that could objectively and quantitatively measure core operations while remaining independent of higher-level library-specific features. Image conversion was chosen as it involves loading an image from disk, converting its format, and saving it back. This process mirrors common operations in web applications and desktop software where rapid image display is critical. +Pixel iteration, on the other hand, was selected to capture the efficiency of low-level image manipulation. Many image processing tasks, such as filtering, transformation, and color adjustment, require access to each pixel individually. By measuring the time taken to iterate over all pixels and apply a basic grayscale conversion, a clear indicator of the library’s capability in handling computationally intensive tasks was obtained. These metrics were chosen over alternatives like image saving speed or memory usage because they directly reflect two complementary dimensions: high-level operational overhead and low-level data processing efficiency. + + +\begin{center} + \includegraphics[width=23em]{media/Methodology - 2.1.png} + \vspace{0em} + \captionof{figure}{Metrics for performance comparison of image processing libraries, including image conversion and pixel iteration and what these metrics represent.} + \label{fig:performance-metrics} + \label{fig:methodology-overview} +\end{center} + +Measurement techniques evolved from initial prototypes and iterative refinements. The importance of warm-up iterations was learned from experiments, as the system needed time to stabilize before meaningful measurements could be taken. This warm-up phase mitigated the effects of just-in-time compilation and caching, ensuring that subsequent iterations reflected steady-state performance rather than the anomalies of system initialization. + +\subsection{Defining the Image Conversion Metric} + +The image conversion test was designed as a JPEG image file loaded from disk, converted to PNG format, and then saved. The JPEG and PNG formats were chosen as examples of a common conversion scenario since JPEG is widely used for image storage and PNG is a lossless format suitable for web applications. The entire process is timed from start to finish. This approach involves several steps that are repeated over many iterations. Initially, 5 iterations as warm-ups are executed to allow the system to stabilize. The warm-up durations are recorded separately and then excluded from the main performance analysis. Once the system is in a steady state, a fixed number of 100 iterations is performed, and the time taken for each one is recorded. + +\begin{center} + \includegraphics[width=33em]{media/Methodology - 2.2.1.png} + \captionof{figure}{Diagram of the Image Conversion Measurement Process, including the phases of warm-up and main iterations and data collection.} + \label{fig:image-conversion-measurement} +\end{center} + +The .NET \texttt{Stopwatch} class is used to record the elapsed time for each stage of the process. By repeating this process for a series of iterations—first running several warm-up cycles and then main iterations—a dataset was generated, that could be averaged to produce normalized performance figures. + +\subsection{Defining the Pixel Iteration Metric} + +The pixel iteration metric targets the efficiency of low-level pixel operations, which are foundational for many advanced image processing techniques. The focus here was on isolating the per-pixel operation, independent of any higher-level image processing abstractions. The measured time provided insights into how efficiently each library handles large amounts of pixel data, a critical factor when scaling to high-resolution images or real-time processing tasks. + +\begin{center} + \includegraphics[width=33em]{media/Methodology - 2.2.2.png} + \captionof{figure}{Diagram of the Pixel Iteration Measurement Process, including the phases of warm-up and main iterations and data collection.} + \label{fig:pixel-iteration-measurement} +\end{center} + + +In this test, the image is loaded into memory, and a nested loop iterates over each pixel. For each pixel, a basic grayscale conversion is applied by computing the average of the red, green, and blue channels, and then rewriting the pixel with the computed grayscale value. Similar to the image conversion test, a series of warm-up iterations is run to ensure the system has reached a stable state. After this phase, the main iterations are executed, and the time for each cycle is recorded. The key metric is the average time per iteration, which serves as an indicator of the library's efficiency in handling per-pixel operations. The rationale behind this metric is that many advanced image processing tasks, such as filtering or feature extraction, require efficient pixel-level manipulation. + +\subsection{Criteria for Library Comparison} + +This comparative evaluation was based on a set of well-defined criteria that reflect both technical performance and practical implementation considerations. The primary criteria were performance (as measured by the two key metrics), functionality (including support for a wide range of image processing tasks), ease of integration (the simplicity of adopting the library within a .NET environment), and licensing. In addition to performance, the integration of BenchmarkDotNet for memory profiling adds another layer to the analysis, allowing the evaluation of trade-offs between speed and memory consumption. + +Functionality was assessed by mapping each library’s capabilities against a comprehensive feature set that included image loading, pixel manipulation, format conversion, and high-level operations such as image composition. Ease of integration was evaluated by considering the availability of wrappers or bindings, the clarity of documentation, and the level of community support. Licensing was scrutinized not only in terms of costs but also in terms of the freedoms and restrictions imposed by each license (e.g., Apache 2.0 and MIT licenses versus commercial licensing models).Tables of feature comparison that was created for each library, and available in the appendix (see Chapter ~\ref{appendix:evaluation-libraries}). + +\begin{center} + \includegraphics[width=19em]{media/Methodology - criteria.png} + \captionof{figure}{Graphical representation of the criteria used for library comparison, including performance, functionality, ease of integration, Community support, and licensing.} + \label{fig:library-selection-criteria} +\end{center} + +Finally, selection of criteria for libraries are grounded in both technical and practical considerations, ensuring that findings are relevant to a wide range of use cases—from small-scale applications to enterprise-level deployments. + +\section{Experimental Setup and Environment} + +The tests were conducted in a controlled environment to ensure reproducibility and accuracy. Insuring that the hardware setup and software environment were consistent across all experiments by using same machine to eliminate variability due to hardware differences. The software environment was configured with a timer, namely the \texttt{Stopwatch} class, which provided millisecond-level precision. And memory profiling was done using BenchmarkDotNet in separate tests to capture not only execution times but also memory allocations and garbage collection metrics, even though the primary focus remained on processing speed. + +\section{Data Collection and Processing} + +The collected data includes the total time taken for the warm-up phase, the average time per iteration during the main phase, and the cumulative time including warm-up. These metrics together provide a comprehensive view for both the image conversion and pixel iteration tests. The simplicity of this test allows for easy replication and clear comparisons among different libraries, which is essential when making performance-based decisions. Each iteration's timing data was recorded using the high-resolution \texttt{Stopwatch} class and stored in memory. Following the completion of each test, the raw data was exported to an Excel file using the EPPlus library. This allowes for statistical analysis later, such as calculating the mean, median, and standard deviation of the performance times. The Excel files also served as a repository for comparative charts and graphs, which will be used to visually represent the findings. + +\begin{center} + \includegraphics[width=33em]{media/Methodology - data.png} + \captionof{figure}{Graphical representation of the data collection and processing steps, including the use of the \texttt{Stopwatch} class, EPPlus library, and Excel files.} + \label{fig:data-collection-processing} +\end{center} + +Additionally for memory profiling, the BenchmarkDotNet library was used to measure memory consumption during the tests. BenchmarkDotNet provides detailed memory allocation and garbage collection reports in console output, which were captured and stored. Then after analyzing the data, the results were aggregated and visualized to provide another layer of insight into the libraries' performance characteristics. These visuals were then used to form the conclusions regarding speed, memory efficiency, and overall suitability for various tasks. + +\section{Conclusion} + +The methodology adopted in this study is not only a tool for performance measurement but also a exploration and discovery. Each step—from defining the metrics to processing the data and selecting the libraries—was a choice aimed at isolating the factors that matter most in image processing. + +In conclusion, the methodology provides a robust framework for comparing image processing libraries. It highlights the critical trade-offs between speed, memory usage, ease of integration, and licensing costs. The insights derived from this study offer valuable guidance for developers and researchers alike, paving the way for more efficient and cost-effective image processing solutions in both academic and commercial settings. diff --git a/chapters/3-Implementation.tex b/chapters/3-Implementation.tex index 3d5cf6ec9b1453eef5a05adca43972f40148855c..980f37b07fc0a229d49b9c73b4664bc81d4a444c 100644 --- a/chapters/3-Implementation.tex +++ b/chapters/3-Implementation.tex @@ -2,7 +2,7 @@ This chapter details the implementation of a comprehensive benchmarking framework to evaluate several image processing libraries, including ImageSharp, OpenCvSharp paired with SkiaSharp, Emgu CV coupled with Structure.Sketching, and Magick.NET integrated with MagicScaler. The objective was to create an endâ€toâ€end system that not only measures execution times for common image operations but also provides insights into memory usage. -This has been sought to answer key questions regarding the efficiency of image conversion and pixel iteration operations—two fundamental tasks in image processing. The following sections describe the review process, architectural decisions, and technical implementations in the study.extensive study. +This has been sought to answer key questions regarding the efficiency of image conversion and pixel iteration operations—two fundamental tasks in image processing. The following sections describe the review process, architectural decisions, and technical implementations in the study. The full implementation, including source code and benchmarking results, is available at Gitlab repository\footnote{\url{https://mygit.th-deg.de/sf07627/fazeli_shahroudi-sepehr-master-sthesis}}. \input{sections/Chapter-3-sections/System-Architecture.tex} diff --git a/chapters/4-Results.tex b/chapters/4-Results.tex index cded2edf3a7077f4e5e854918b192dcd212d9120..58467d64006a1008d82b1eb52fd82a84342694eb 100644 --- a/chapters/4-Results.tex +++ b/chapters/4-Results.tex @@ -1,12 +1,12 @@ \chapter{Results} -This chapter presents our findings from the benchmarking experiments conducted to evaluate the performance of alternative image processing libraries. The results include quantitative data on image conversion and pixel iteration times, as well as memory consumption for each library or combination tested. The data generated will be used to answer the research question and support the hypotheses formulated in the previous chapters. The benchmarking approach consisted of running two primary tests on each library: an image conversion test that measured the time taken to load, process, and save images, and a pixel iteration test that recorded the time required to process every pixel in an image for a grayscale conversion. These experiments were performed in a controlled environment, with warm-up iterations included to reduce the impact of initial overhead. Memory consumption was tracked alongside processing times using BenchmarkDotNet, thereby offering a complete picture of both speed and resource utilization.\\ +This chapter presents findings from the benchmarking experiments conducted to evaluate the performance of alternative image processing libraries. The results include quantitative data on image conversion and pixel iteration times, as well as memory consumption for each library or combination tested. The data generated will be used to answer the research question and support the hypotheses formulated in the previous chapters. The benchmarking approach consisted of running two primary tests on each library: an image conversion test that measured the time taken to load, process, and save images, and a pixel iteration test that recorded the time required to process every pixel in an image for a grayscale conversion. These experiments were performed in a controlled environment, with warm-up iterations included to reduce the impact of initial overhead. Memory consumption was tracked alongside processing times using BenchmarkDotNet, thereby offering a complete picture of both speed and resource utilization.\\ %%[PLACEHOLDER: a media summarizing benchmarking methodology] Before discussing the results in detail, it is important to review the benchmarking design. In this study, each library was tested under the same conditions: the same input image was used, a fixed number of warm-up iterations were performed to reduce the effects of just-in-time compilation and caching, and finally, 100 main iterations were executed to ensure reliable statistics. For the image conversion test, the time measured was the duration needed to load a JPEG image, convert it to PNG, and save it back to disk. In the pixel iteration test, the focus was on recording the time required to access and change each pixel for producing a grayscale version of the image. -Memory diagnostics were captured concurrently, with particular attention to allocated memory and garbage collection events. This dual approach ensured that our results were not solely focused on speed but also took into account the resource efficiency of each solution. +Memory diagnostics were captured concurrently, with particular attention to allocated memory and garbage collection events. This dual approach ensured that the results were not solely focused on speed but also took into account the resource efficiency of each solution. %%[PLACEHOLDER: a media Diagram of benchmarking process] or reference to it diff --git a/chapters/5-Discussion.tex b/chapters/5-Discussion.tex index a4e62ceda72d3143bd7f0946076b2e069fc6cb25..6a14a521a4cbfce723488477a11781ecf536993f 100644 --- a/chapters/5-Discussion.tex +++ b/chapters/5-Discussion.tex @@ -1,18 +1,18 @@ \chapter{Discussion} -This chapter interprets the results obtained in the benchmarking experiments, placing them in a broader theoretical and practical context.Explores what the results imply about the efficiency, ease of implementation, licensing concerns, and usability of the evaluated image processing libraries. Furthermore, addresses the larger implications of these findings for software development and image processing as a field. +This chapter interprets the results obtained in the benchmarking experiments, placing them in a broader theoretical and practical context. It examines the implications of the performance metrics in terms of computational efficiency, implementation complexity, licensing considerations, and the overall usability of the evaluated image processing libraries. Moreover, this discussion extends to address the broader impact these results have on advancements in software engineering and the evolving field of image processing. \section{Interpreting the Results: Performance vs. Practicality} -The results obtained from our benchmarking study reveal a clear hierarchy of performance among the tested libraries. However, performance alone does not determine the best library for a given use case. The ideal choice depends on a variety of factors, including memory efficiency, ease of integration, licensing constraints, and the specific needs of the application. +The results obtained from the benchmarking study reveal a clear hierarchy of performance among the tested libraries. However, performance alone does not determine the best library for a given use case. The ideal choice depends on a variety of factors, including memory efficiency, ease of integration, licensing constraints, and the specific needs of the application. \subsection{Performance Trade-offs and Suitability for Real-World Applications} -From performance standpoint, OpenCvSharp + SkiaSharp and Emgu CV + Structure.Sketching outperform ImageSharp in both image conversion and pixel iteration tasks. However, these libraries require more complex implementations compared to ImageSharp’s user-friendly API. While ImageSharp is slower, it remains a compelling option for projects where ease of use is prioritized over raw speed. SkiaSharp, with its lightweight architecture and cross-platform compatibility, demonstrated remarkable performance in image conversion tasks. It consistently outperformed ImageSharp while consuming significantly less memory. This makes SkiaSharp an ideal choice for applications requiring efficient format conversion without extensive manipulation of individual pixels. Emgu CV, despite its high memory usage, proved to be the fastest option for pixel iteration. This is unsurprising, given its reliance on OpenCV’s highly optimized C++ backend. However, its higher memory footprint may be a drawback for applications running on constrained systems. Magick.NET, on the other hand, performed well in certain tasks but fell short in pixel iteration due to excessive processing times. This suggests that while Magick.NET is a robust tool for high-quality image manipulation and format conversion, it may not be suitable for performance-critical applications requiring low-latency processing. in graph \ref{fig:image-conversion} and \ref{fig:pixel-iteration} the performance comparison of the libraries in image conversion and pixel iteration tasks respectively can be seen. +From performance standpoint, OpenCvSharp + SkiaSharp and Emgu CV + Structure.Sketching outperform ImageSharp in both image conversion and pixel iteration tasks. However, ImageSharp showed better memory efficiency during pixel iteration, making it a viable option for applications with limited memory resources. SkiaSharp, with its lightweight architecture and cross-platform compatibility, demonstrated remarkable performance in image conversion tasks. It consistently outperformed ImageSharp while consuming significantly less memory. This makes SkiaSharp an ideal choice for applications requiring efficient format conversion without extensive manipulation of individual pixels. Emgu CV, despite its high memory usage, proved to be the fastest option for pixel iteration. This is unsurprising, given its reliance on OpenCV’s highly optimized C++ backend. However, its higher memory footprint may be a drawback for applications running on constrained systems. Magick.NET, on the other hand, didn't perform well in both image conversion and pixel iteration tasks. This suggests that while Magick.NET is a robust tool for high-quality image manipulation and format conversion, it may not be suitable for performance-critical applications requiring low-latency processing. In graph \ref{fig:image-conversion} and \ref{fig:pixel-iteration} the performance comparison of the libraries in image conversion and pixel iteration tasks respectively can be seen. \subsection{The Impact of Licensing on Library Selection} -Licensing can be a key consideration in selecting an image processing library. The cost of proprietary solutions can be prohibitive, particularly for small businesses or open-source projects. ImageSharp, while powerful, requires a yearly cost of couple of thousand dollars for commercial use.This cost must be weighed against its performance limitations. Open-source alternatives like OpenCvSharp and SkiaSharp, which are licensed under MIT and Apache 2.0 respectively, offer a compelling alternative by providing high performance at no cost. Emgu CV, although based on the open-source OpenCV framework, requires a one-time fee (version specific) of less than thousand dollars, with additional costs for future upgrades. While this is significantly more affordable than ImageSharp, it still represents an investment that must be justified by superior performance. on the other hand,Magick.NET was licensed under Apache 2.0, and provides extensive functionality for free, making it an attractive option for projects that require advanced image processing features but cannot afford proprietary licenses. +Licensing can be a key consideration in selecting an image processing library. The cost of proprietary solutions can be prohibitive, particularly for small businesses or open-source projects. ImageSharp, while powerful, requires a yearly cost of a couple of thousand dollars for commercial use. This cost must be weighed against its performance limitations. Open-source alternatives like OpenCvSharp and SkiaSharp, which are licensed under MIT and Apache 2.0 respectively, offer a compelling alternative by providing high performance at no cost. Emgu CV, although based on the open-source OpenCV framework, requires a one-time fee (version specific) of less than thousand dollars, with additional costs for future upgrades. While this is significantly more affordable than ImageSharp, it still represents an investment that must be justified by superior performance. On the other hand, Magick.NET was licensed under Apache 2.0, and provides extensive functionality for free, making it an attractive option for projects that require advanced image processing features but cannot afford proprietary licenses. \begin{longtable} {|>{\raggedright\arraybackslash}p{0.30\textwidth}|>{\raggedright\arraybackslash}p{0.20\textwidth}|>{\raggedright\arraybackslash}p{0.20\textwidth}|>{\raggedright\arraybackslash}p{0.20\textwidth}|} @@ -40,14 +40,13 @@ Licensing can be a key consideration in selecting an image processing library. T \section{Strengths and Weaknesses of the Different Libraries} -ImageSharp’s biggest advantage is its simple API and pure .NET implementation. It is easy to integrate and requires minimal setup. However, our benchmarks show that it lags behind other libraries in performance. Its relatively high memory efficiency during pixel iteration is a plus, but for tasks requiring fast image conversion or pixel-level modifications, other options are preferable. -OpenCvSharp+SkiaSharp: High Performance, Moderate Complexity.This combination provides the best balance between speed and memory efficiency. OpenCvSharp offers the power of OpenCV’s optimized image processing, while SkiaSharp enhances its rendering and format conversion capabilities. However, using these libraries effectively requires familiarity with both OpenCV and SkiaSharp APIs, making them less beginner-friendly than ImageSharp. Emgu CV’s performance in pixel iteration tasks is unmatched, making it ideal for applications involving real-time image analysis, such as AI-driven image recognition. However, its high memory consumption may pose a problem for resource-limited environments. Structure.Sketching complements Emgu CV by providing efficient image creation and drawing capabilities, making this combination well-suited for applications requiring both processing speed and graphical rendering. In contrast,Magick.NET excels in high-quality image manipulation and resampling but falls short in raw speed. The high processing times recorded for pixel iteration indicate that Magick.NET is best suited for batch processing or scenarios where quality takes precedence over execution time. And MagickScaler, provides advanced image scaling capabilities, making it a valuable tool for applications requiring precise image resizing and enhancement. +ImageSharp’s biggest advantage is its simple API and pure .NET implementation. It is easy to integrate and requires minimal setup. However, benchmarks show that it lags behind other libraries in performance. Its relatively high memory efficiency during pixel iteration is a plus, but for tasks requiring fast image conversion or pixel-level modifications, other options are preferable. The combination of OpenCvSharp and SkiaSharp offers a mix of high performance and moderate complexity. This combination provides the best balance between speed and memory efficiency. OpenCvSharp offers the power of OpenCV’s optimized image processing, while SkiaSharp enhances its rendering and format conversion capabilities. However, using these libraries effectively requires familiarity with both OpenCV and SkiaSharp APIs, making them less beginner-friendly than ImageSharp. Emgu CV’s performance in pixel iteration tasks is unmatched, making it ideal for applications involving real-time image analysis, such as AI-driven image recognition. However, its high memory consumption may pose a problem for resource-limited environments. Structure.Sketching complements Emgu CV by providing efficient image creation and drawing capabilities, making this combination well-suited for applications requiring both processing speed and graphical rendering. In contrast, Magick.NET excels in high-quality image manipulation and resampling but falls short in raw speed. The high processing times recorded for pixel iteration indicate that Magick.NET is best suited for batch processing or scenarios where quality takes precedence over execution time. And MagickScaler provides advanced image scaling capabilities, making it a valuable tool for applications requiring precise image resizing and enhancement. Overally There is no single library that is best for all use cases. The optimal choice depends on the application’s specific requirements. If ease of implementation and maintainability are priorities, ImageSharp remains a solid choice despite its performance drawbacks. For performance-intensive applications where raw speed is essential, OpenCvSharp+SkiaSharp or Emgu CV+Structure.Sketching are superior choices. \vspace{1em} \includegraphics[width=\textwidth]{media/usecase.png} -\captionof{figure}{Mapping different libraries to their ideal use cases} +\captionof{figure}{Diagram showing the ideal use cases for each library or library combination based on this study's findings.} \label{fig:usecase} \section{Considerations for Future Research} @@ -58,9 +57,13 @@ Moreover, the balance between speed and memory efficiency is a recurring challen Future research could explore the following areas to further enhance the capabilities of image processing libraries: -\textbf{Expanding the Scope of Benchmarking:} While our study focused on image conversion and pixel iteration, real-world applications often require additional operations such as filtering, blending, and object detection. Future research could expand the benchmarking scope to include these tasks, providing a more comprehensive evaluation of each library’s capabilities. +\textbf{Expanding the Scope of Benchmarking:} While the study focused on image conversion and pixel iteration, real-world applications often require additional operations such as filtering, blending, and object detection. Future research could expand the benchmarking scope to include these tasks, providing a more comprehensive evaluation of each library’s capabilities. -\textbf{GPU Acceleration and Parallel Processing:} One limitation of our study is that all benchmarks were conducted on a CPU. Many modern image processing tasks benefit from GPU acceleration, which libraries like OpenCV support. Investigating the performance of these libraries on GPU-accelerated hardware could yield valuable insights into their scalability and efficiency. +\textbf{Cross-Language Compatibility:} Many image processing libraries are available in multiple programming languages, such as Python, Java, and C++. Investigating the performance of these libraries across different languages could provide valuable insights into the impact of language-specific optimizations on computational efficiency. + +\textbf{Format-Specific Performance:} Different image formats have unique compression algorithms and color spaces, which can impact the performance of image processing libraries. Future research could investigate how each library performs with specific formats, such as TIFF, BMP, or PNG, to identify any format-specific optimizations or bottlenecks. + +\textbf{GPU Acceleration and Parallel Processing:} One limitation of this study is that all benchmarks were conducted on a CPU. Many modern image processing tasks benefit from GPU acceleration, which libraries like OpenCV support. Investigating the performance of these libraries on GPU-accelerated hardware could yield valuable insights into their scalability and efficiency. \textbf{Cloud-Based Processing:} With the growing adoption of cloud computing, it would be beneficial to evaluate how these libraries perform in cloud-based environments such as AWS Lambda or Azure Functions. Factors such as cold start times, scalability, and integration with cloud-based storage solutions would be critical considerations for enterprise applications. diff --git a/chapters/Appendices.tex b/chapters/Appendices.tex index 02d6cd54e218b0aa17265953abba7653c23cf676..a6a2e61e10a8a09bc0a6fc8b9d877891934f903b 100755 --- a/chapters/Appendices.tex +++ b/chapters/Appendices.tex @@ -1,5 +1,9 @@ -\chapter{Appendices} -\cite{ferreira_generic_2024} -\input{chapters/Appendices/appendix_a.tex} -\input{chapters/Appendices/appendix_b.tex} -\input{chapters/Appendices/appendix_c.tex} \ No newline at end of file +\chapter{Appendix} +\label{ch:appendix} + +\subsubsection{Evaluation of Image Processing Libraries} +\label{appendix:evaluation-libraries} + +This appendix provides a detailed analysis of various image processing libraries considered for the implementation phase of this thesis. Each library is evaluated based on key technical criteria, licensing considerations, and integration requirements. + +\input{outdated/evaluation-of-alternatives} \ No newline at end of file diff --git a/media/Methodology - 2.1.png b/media/Methodology - 2.1.png new file mode 100644 index 0000000000000000000000000000000000000000..559da832fa3efb8e67548b5fbead1919ab0b95ee Binary files /dev/null and b/media/Methodology - 2.1.png differ diff --git a/media/Methodology - 2.2.1.png b/media/Methodology - 2.2.1.png new file mode 100644 index 0000000000000000000000000000000000000000..2333a4466847c94de01abc490caf3be12716eeef Binary files /dev/null and b/media/Methodology - 2.2.1.png differ diff --git a/media/Methodology - 2.2.2.png b/media/Methodology - 2.2.2.png new file mode 100644 index 0000000000000000000000000000000000000000..7e51ae08e5a4c4393bc4ed0f3103dad0e9159c5f Binary files /dev/null and b/media/Methodology - 2.2.2.png differ diff --git a/media/Methodology - criteria.png b/media/Methodology - criteria.png new file mode 100644 index 0000000000000000000000000000000000000000..fd63f9b658953d2914be59af828605e69237cf12 Binary files /dev/null and b/media/Methodology - criteria.png differ diff --git a/media/Methodology - data.png b/media/Methodology - data.png new file mode 100644 index 0000000000000000000000000000000000000000..400550652641023c43f3b3ae11522d33f46a465b Binary files /dev/null and b/media/Methodology - data.png differ diff --git a/media/Methodology - selection.png b/media/Methodology - selection.png new file mode 100644 index 0000000000000000000000000000000000000000..ff529dd29806aa0d7bc3f184380ea071e9b937be Binary files /dev/null and b/media/Methodology - selection.png differ diff --git a/media/log_1.png b/media/log_1.png index a20192457906a80c72671b727271fbdd019264bd..087bd570b8a69ade29327455f4bb1da1743474a9 100644 Binary files a/media/log_1.png and b/media/log_1.png differ diff --git a/media/log_2.png b/media/log_2.png index 13e0227b42a7f1297b686a53151b9cf77a125c31..339f01b095d068365aa58473730ec85821d0ec99 100644 Binary files a/media/log_2.png and b/media/log_2.png differ diff --git a/outdated/evaluation-of-alternatives.tex b/outdated/evaluation-of-alternatives.tex index d25fa5a0576ccf161310a9d4070a13a2c944d964..e0360221345bef81984b27ff1d7ac804e51b131c 100755 --- a/outdated/evaluation-of-alternatives.tex +++ b/outdated/evaluation-of-alternatives.tex @@ -1,5 +1,5 @@ \hypertarget{openimageio-oiio}{% -\subsection{1. OpenImageIO (OIIO)}\label{openimageio-oiio}} +\subsubsection{1. OpenImageIO (OIIO)}\label{openimageio-oiio}} \begin{itemize} \item \textbf{Type}: Open-source @@ -15,7 +15,7 @@ \input{Tables/OpenImageIO_table.tex} \hypertarget{skiasharp}{% -\subsection{2.~SkiaSharp}\label{skiasharp}} +\subsubsection{2.~SkiaSharp}\label{skiasharp}} \begin{itemize} \item @@ -40,7 +40,7 @@ \input{Tables/SkiaSharp_table.tex} \hypertarget{magick.net}{% -\subsection{3. Magick.NET}\label{magick.net}} +\subsubsection{3. Magick.NET}\label{magick.net}} \begin{itemize} \item @@ -64,7 +64,7 @@ \input{Tables/MagickNET_table.tex} \hypertarget{emgu-cv}{% -\subsection{4. Emgu CV}\label{emgu-cv}} +\subsubsection{4. Emgu CV}\label{emgu-cv}} \begin{itemize} \item @@ -90,7 +90,7 @@ \input{Tables/EmguCV_table.tex} \hypertarget{magicscaler}{% -\subsection{5. MagicScaler}\label{magicscaler}} +\subsubsection{5. MagicScaler}\label{magicscaler}} \begin{itemize} \item @@ -114,7 +114,7 @@ \input{Tables/MagicScaler_table.tex} \hypertarget{simpleitk}{% -\subsection{6. SimpleITK}\label{simpleitk}} +\subsubsection{6. SimpleITK}\label{simpleitk}} \begin{itemize} \item @@ -139,7 +139,7 @@ \input{Tables/SimpleITK_table.tex} \hypertarget{structure.sketching}{% -\subsection{7. Structure.Sketching}\label{structure.sketching}} +\subsubsection{7. Structure.Sketching}\label{structure.sketching}} \begin{itemize} \item @@ -165,7 +165,7 @@ \input{Tables/Structure.Sketching_table.tex} \hypertarget{opencvsharp}{% -\subsection{8. OpenCvSharp}\label{opencvsharp}} +\subsubsection{8. OpenCvSharp}\label{opencvsharp}} \begin{itemize} \item @@ -188,7 +188,7 @@ \input{Tables/OpenCvSharp_table.tex} \hypertarget{microsoft.maui.graphics}{% -\subsection{9. Microsoft.Maui.Graphics}\label{microsoft.maui.graphics}} +\subsubsection{9. Microsoft.Maui.Graphics}\label{microsoft.maui.graphics}} \begin{itemize} \item @@ -216,7 +216,7 @@ now read-only.} \input{Tables/Microsoft.Maui.Graphics_table.tex} \hypertarget{leadtools}{% -\subsection{10.~LeadTools}\label{leadtools}} +\subsubsection{10.~LeadTools}\label{leadtools}} \begin{itemize} \item diff --git a/sections/Chapter-1-sections/Aim-and-Objectives.tex b/sections/Chapter-1-sections/Aim-and-Objectives.tex index 471684514a87f9ec44c38f8e52134d15b836f457..8611495a6bb758957e7e2d0379d037367b96be69 100644 --- a/sections/Chapter-1-sections/Aim-and-Objectives.tex +++ b/sections/Chapter-1-sections/Aim-and-Objectives.tex @@ -1,6 +1,6 @@ \section{ Aim of the Study and Its Implications for Selecting an Image Processing Tool} -This study was initiated to compare a broad range of image processing libraries based on performance, functionality, and ease of integration. Although the investigation was partly motivated by considerations around the ImageSharp license, the primary goal is to establish a general framework for evaluating different tools in the field. By assessing key metrics such as image conversion speed, pixel iteration efficiency, memory consumption, and development effort, the research aims to provide a balanced perspective that assists developers, engineers, and decision-makers in selecting the most appropriate image processing tool for their projects. +The purpose of this study was to compare the performance, functionality, and ease of integration of a wide range of image processing libraries. The primary objective is to establish a general framework for evaluating different tools in the field. As part of this research, key metrics such as conversion speed, pixel iteration efficiency, memory consumption, and development effort will be evaluated in order to provide developers, engineers, and decision-makers with a balanced viewpoint. \subsection{ Research Goals and Objectives} @@ -17,26 +17,26 @@ At its core, the study sought to answer the question: “Which image processing Beyond performance metrics, the study was designed to consider the broader context of software integration. Factors such as ease of implementation, the learning curve for developers, compatibility with existing systems, and community support were all taken into account. This holistic view means that the research is not just about raw performance numbers but also about the practicalities of deploying and maintaining these tools in production environments. \end{enumerate} -\subsection{ Methodology and Benchmarking} +% \subsection{ Methodology and Benchmarking} -To achieve these objectives, the study adopted a multi-faceted methodological approach that combined qualitative assessments with quantitative benchmarks. The research was structured into several key phases: +% To achieve these objectives, the study adopted a multi-faceted methodological approach that combined qualitative assessments with quantitative benchmarks. The research was structured into several key phases: -\subsubsection{ Establishing Functional Criteria} +% \subsubsection{ Establishing Functional Criteria} -The first step was to outline the core functionalities required from an image processing library. These functionalities included image loading and creation, pixel-level manipulation, image transformation (such as cropping, resizing, and color conversion), and the encoding\\decoding of various image formats. Each library was then evaluated on how well it supports these functions. For example, while ImageSharp provides an elegant and fluent API for chaining operations like cloning, mutating, and resizing images, other tools like Emgu CV or SkiaSharp may offer advantages in raw performance or specific tasks such as advanced 2D rendering. +% The first step was to outline the core functionalities required from an image processing library. These functionalities included image loading and creation, pixel-level manipulation, image transformation (such as cropping, resizing, and color conversion), and the encoding\\decoding of various image formats. Each library was then evaluated on how well it supports these functions. For example, while ImageSharp provides an elegant and fluent API for chaining operations like cloning, mutating, and resizing images, other tools like Emgu CV or SkiaSharp may offer advantages in raw performance or specific tasks such as advanced 2D rendering. -\subsubsection{ Performance and Memory Benchmarking} +% \subsubsection{ Performance and Memory Benchmarking} -Quantitative performance metrics were a central component of the study. Two key tests were developed: +% Quantitative performance metrics were a central component of the study. Two key tests were developed: -\begin{itemize} - \item \textbf{Image Conversion Test:} This test measured the time taken to load an image, convert it to a different format, and save the result. It simulates a typical workflow in many image processing applications and serves as a proxy for real-world performance. The results indicated significant differences between libraries. For instance, SkiaSharp showed excellent performance in image conversion tasks with both the fastest conversion times and minimal memory allocation, making it an attractive option for performance-critical applications. - \item \textbf{Pixel Iteration Test:} Many image processing tasks require iterating over each pixel—for example, when applying filters or performing color adjustments. The study evaluated how long each library took to perform such operations and the associated memory footprint. Although some tools demonstrated faster pixel iteration times, the overall memory consumption varied widely, highlighting the trade-off between speed and resource usage. -\end{itemize} +% \begin{itemize} +% \item \textbf{Image Conversion Test:} This test measured the time taken to load an image, convert it to a different format, and save the result. It simulates a typical workflow in many image processing applications and serves as a proxy for real-world performance. The results indicated significant differences between libraries. For instance, SkiaSharp showed excellent performance in image conversion tasks with both the fastest conversion times and minimal memory allocation, making it an attractive option for performance-critical applications. +% \item \textbf{Pixel Iteration Test:} Many image processing tasks require iterating over each pixel—for example, when applying filters or performing color adjustments. The study evaluated how long each library took to perform such operations and the associated memory footprint. Although some tools demonstrated faster pixel iteration times, the overall memory consumption varied widely, highlighting the trade-off between speed and resource usage. +% \end{itemize} -\subsubsection{ Estimation of Development Effort} +% \subsubsection{ Estimation of Development Effort} -Recognizing that performance is not the sole criterion for tool selection, the study also estimated the development effort required to integrate each library into an existing application. This included considerations such as the ease of understanding the API, the availability of documentation and community support, and the potential need for custom code to bridge functionality gaps. For example, while some libraries offered powerful processing capabilities, they might require significant custom development to integrate seamlessly into a .NET environment or to support specific image formats. +% Recognizing that performance is not the sole criterion for tool selection, the study also estimated the development effort required to integrate each library into an existing application. This included considerations such as the ease of understanding the API, the availability of documentation and community support, and the potential need for custom code to bridge functionality gaps. For example, while some libraries offered powerful processing capabilities, they might require significant custom development to integrate seamlessly into a .NET environment or to support specific image formats. \subsection{ Practical Implications for Tool Selection} @@ -52,16 +52,16 @@ One of the standout contributions of the study is its ability to help users make \subsubsection{ Extending Beyond Cost Savings} -While cost savings—such as the €5000 per year saving associated with avoiding ImageSharp’s licensing fees—are certainly a factor, the study underscores that financial considerations should not be the sole driver of decision-making. The true value of an image processing tool lies in its ability to meet specific technical and operational requirements. By providing a detailed comparison of several alternatives, the research emphasizes that factors like ease of integration, scalability, and overall performance are equally, if not more, important. This holistic approach helps organizations avoid the pitfall of selecting a tool based solely on its cost. +While cost savings are certainly a factor, the study underscores that financial considerations should not be the sole driver of decision-making. The true value of an image processing tool lies in its ability to meet specific technical and operational requirements. By providing a detailed comparison of several alternatives, the research emphasizes that factors like ease of integration, scalability, and overall performance are equally, if not more, important. This holistic approach helps organizations avoid the pitfall of selecting a tool based solely on its cost. \subsubsection{ Guiding Future Developments and Integrations} The insights gained from the study are not only applicable to current technology choices but also serve as a guide for future developments in image processing. The detailed benchmarks and performance analyses can inform future projects, helping developers understand where improvements can be made or which features are most critical. Additionally, the study’s approach to evaluating development effort and integration challenges provides a roadmap for how future research can build on these findings to further refine the selection process. -\subsection{ Conclusion} +% \subsection{ Conclusion} -In conclusion, this research offers a detailed and methodical framework for comparing a diverse range of image processing libraries. By focusing on critical performance indicators—such as image conversion efficiency, pixel manipulation speed, and memory usage—alongside practical considerations for integration, the study provides actionable insights that transcend mere numerical comparisons. This comprehensive evaluation enables practitioners to appreciate the subtle differences and inherent trade-offs between various tools, ensuring that the selected library meets specific operational requirements and supports long-term scalability. +% In conclusion, this research offers a detailed and methodical framework for comparing a diverse range of image processing libraries. By focusing on critical performance indicators—such as image conversion efficiency, pixel manipulation speed, and memory usage—alongside practical considerations for integration, the study provides actionable insights that transcend mere numerical comparisons. This comprehensive evaluation enables practitioners to appreciate the subtle differences and inherent trade-offs between various tools, ensuring that the selected library meets specific operational requirements and supports long-term scalability. -The findings underscore the importance of adopting a multi-dimensional evaluation approach. Rather than basing decisions solely on isolated performance metrics, the research illustrates how a balanced view—integrating both technical capabilities and practical implementation challenges—can lead to more robust and adaptable solutions. This perspective is essential in a field where evolving technologies and shifting project demands necessitate both flexibility and precision in tool selection. +% The findings underscore the importance of adopting a multi-dimensional evaluation approach. Rather than basing decisions solely on isolated performance metrics, the research illustrates how a balanced view—integrating both technical capabilities and practical implementation challenges—can lead to more robust and adaptable solutions. This perspective is essential in a field where evolving technologies and shifting project demands necessitate both flexibility and precision in tool selection. -Ultimately, the insights derived from this investigation empower developers, engineers, and decision-makers to navigate the complex landscape of image processing technologies with confidence. By providing a thorough, balanced comparison of various libraries, the study serves as a valuable resource for making informed decisions that address current needs while also laying a strong foundation for future innovation and development in image processing. \ No newline at end of file +% Ultimately, the insights derived from this investigation empower developers, engineers, and decision-makers to navigate the complex landscape of image processing technologies with confidence. By providing a thorough, balanced comparison of various libraries, the study serves as a valuable resource for making informed decisions that address current needs while also laying a strong foundation for future innovation and development in image processing. \ No newline at end of file diff --git a/sections/Chapter-1-sections/General-Introduction.tex b/sections/Chapter-1-sections/General-Introduction.tex index c7216f6d3809468f39e7732f9ef8324c6319a131..b63168b2a3d6147147e20b98ecd5e64a187f90a8 100644 --- a/sections/Chapter-1-sections/General-Introduction.tex +++ b/sections/Chapter-1-sections/General-Introduction.tex @@ -1,23 +1,33 @@ \section{ The Significance of Image Processing in Modern Industry} -Digital image processing has emerged as a cornerstone of modern industrial applications, revolutionizing the way industries operate and innovate. From quality control in manufacturing to advanced simulations in aerospace, the ability to process and analyze images digitally has unlocked unprecedented efficiencies and capabilities. This field, which involves the manipulation and analysis of images using algorithms, has evolved significantly over the past few decades, driven by advancements in computing power, algorithm development, and the proliferation of digital imaging devices. +Digital image processing has emerged as a cornerstone of modern industrial applications, revolutionizing the way industries operate and innovate. From quality control in manufacturing to advanced simulations in aerospace, the ability to process and analyze images digitally has unlocked unprecedented efficiencies and capabilities. This field, which involves the manipulation and analysis of images using algorithms, has evolved significantly over the past few decades, driven by advancements in computing power, algorithm development, and the proliferation of digital imaging devices \cite{gonzalez_digital_2008-1,jain_fundamentals_1989-1}. -The significance of digital image processing in industrial applications cannot be overstated. In manufacturing, for instance, image processing is integral to quality assurance processes, where it is used to detect defects, measure product dimensions, and ensure compliance with stringent standards. This capability not only enhances product quality but also reduces waste and operational costs. In the automotive industry, image processing is pivotal in the development of autonomous vehicles, where it aids in object detection, lane departure warnings, and pedestrian recognition. Similarly, in the healthcare sector, digital image processing is used in medical imaging technologies such as MRI and CT scans, enabling more accurate diagnoses and treatment planning. +The significance of digital image processing in industrial applications cannot be overstated. In manufacturing, for instance, image processing is integral to quality assurance processes, where it is used to detect defects, measure product dimensions, and ensure compliance with stringent standards . This capability not only enhances product quality but also reduces waste and operational costs. In the automotive industry, image processing is pivotal in the development of autonomous vehicles, where it aids in object detection, lane departure warnings, and pedestrian recognition. Similarly, in the healthcare sector, digital image processing is used in medical imaging technologies such as MRI and CT scans, enabling more accurate diagnoses and treatment planning \cite{russ_image_2016,szeliski_introduction_2022}. -The evolution of digital image processing has been marked by several key developments. Initially, the field was limited by the computational resources available, with early applications focusing on basic image enhancement and restoration. However, the advent of powerful processors and the development of sophisticated algorithms have expanded the scope of image processing to include complex tasks such as pattern recognition, 3D reconstruction, and real-time image analysis. The integration of artificial intelligence and machine learning has further propelled the field, allowing for the development of intelligent systems capable of learning from data and improving over time. +The evolution of digital image processing has been marked by several key developments. Initially, the field was limited by the computational resources available, with early applications focusing on basic image enhancement and restoration. However, the advent of powerful processors and the development of sophisticated algorithms have expanded the scope of image processing to include complex tasks such as pattern recognition, 3D reconstruction, and real-time image analysis. The integration of artificial intelligence and machine learning has further propelled the field, allowing for the development of intelligent systems capable of learning from data and improving over time \cite{gonzalez_digital_2008-1,szeliski_introduction_2022,goodfellow_deep_2016}. -For industries like Dassault Systems, which operates at the forefront of aerospace, defense, and industrial engineering, a comparative study of image processing libraries is crucial. These libraries, which provide pre-built functions and tools for image analysis, vary significantly in terms of performance, ease of use, and functionality. Selecting the right library can have a profound impact on the efficiency and effectiveness of image processing tasks. For instance, libraries such as OpenCV, TensorFlow, and MATLAB offer different strengths and weaknesses, and understanding these differences is essential for optimizing industrial applications. +For industries like Dassault Systems, which operates at the forefront of aerospace, defense, and industrial engineering, a comparative study of image processing libraries is crucial. These libraries, which provide pre-built functions and tools for image analysis, vary significantly in terms of performance, ease of use, and functionality. Selecting the right library can have a profound impact on the efficiency and effectiveness of image processing tasks. For instance, libraries such as OpenCV, Imagemagick and ImageSharp offer different strengths and weaknesses, and understanding these differences is essential for optimizing industrial applications \cite{bradski_learning_2008}. A comparative study of these libraries not only aids in selecting the most suitable tools for specific tasks but also highlights areas for potential improvement and innovation. By analyzing the performance of different libraries in various scenarios, industries can identify gaps in current technologies and drive the development of new solutions that better meet their needs. Moreover, such studies contribute to the broader field of digital image processing by providing insights into best practices and emerging trends. % References + % 1. Gonzalez, R. C., & Woods, R. E. (2008). Digital Image Processing. Pearson Prentice Hall. +% \cite{gonzalez_digital_2008} +% \cite{gonzalez_digital_2008-1} % 2. Jain, A. K. (1989). Fundamentals of Digital Image Processing. Prentice Hall. +% \cite{jain_fundamentals_1989} +% \cite{jain_fundamentals_1989-1} % 3. Bradski, G., & Kaehler, A. (2008). Learning OpenCV: Computer Vision with the OpenCV Library. O'Reilly Media. -% 4. Russ, J. C. (2011). The Image Processing Handbook. CRC Press. +% \cite{bradski_learning_2008} +% 4. Russ, J. C. (2016). The Image Processing Handbook. CRC Press. +% \cite{russ_image_2016} % 5. Goodfellow, I., Bengio, Y., & Courville, A. (2016). Deep Learning. MIT Press. -% 6. Szeliski, R. (2010). Computer Vision: Algorithms and Applications. Springer. +% \cite{goodfellow_deep_2016} +% 6. Szeliski, R. (2022). Computer Vision: Algorithms and Applications. Springer. +% \cite{szeliski_introduction_2022} +% \cite{szeliski_image_2022} \subsection{Evolution and Impact of Digital Image Processing} @@ -25,32 +35,37 @@ Digital image processing has evolved significantly since its inception, transfor \subsubsection{Early Beginnings} -The origins of digital image processing can be traced back to the 1920s and 1930s with the development of television technology, which laid the groundwork for electronic image capture and transmission. However, it wasn't until the 1960s that digital image processing began to take shape as a distinct field. The launch of the first digital computers provided the necessary computational power to process images digitally. During this period, NASA played a pivotal role by using digital image processing to enhance images of the moon's surface captured by the Ranger 7 spacecraft in 1964. This marked one of the first significant applications of digital image processing, demonstrating its potential for scientific and exploratory purposes. +The origins of digital image processing can be traced back to the 1920s and 1930s with the development of television technology, which laid the groundwork for electronic image capture and transmission. However, it wasn't until the 1960s that digital image processing began to take shape as a distinct field. The launch of the first digital computers provided the necessary computational power to process images digitally. During this period, NASA played a pivotal role by using digital image processing to enhance images of the moon's surface captured by the Ranger 7 spacecraft in 1964. This marked one of the first significant applications of digital image processing, demonstrating its potential for scientific and exploratory purposes \cite{cooley_algorithm_1965}. \subsubsection{The 1970s and 1980s: Theoretical Foundations and Practical Applications} -The 1970s saw the establishment of theoretical foundations for digital image processing. Researchers developed algorithms for image enhancement, restoration, and compression. The Fast Fourier Transform (FFT), introduced by Cooley and Tukey in 1965, became a fundamental tool for image processing, enabling efficient computation of image transformations. This period also witnessed the development of the first commercial applications, such as medical imaging systems. The introduction of Computed Tomography (CT) in 1972 revolutionized medical diagnostics by providing detailed cross-sectional images of the human body, showcasing the life-saving potential of digital image processing. +The 1970s saw the establishment of theoretical foundations for digital image processing. Researchers developed algorithms for image enhancement, restoration, and compression. The Fast Fourier Transform (FFT), introduced by Cooley and Tukey in 1965, became a fundamental tool for image processing, enabling efficient computation of image transformations. This period also witnessed the development of the first commercial applications, such as medical imaging systems. The introduction of Computed Tomography (CT) in 1972 revolutionized medical diagnostics by providing detailed cross-sectional images of the human body, showcasing the life-saving potential of digital image processing \cite{cooley_algorithm_1965,hounsfield_computerized_1973}. \subsubsection{The 1990s: The Rise of Computer Vision} -The 1990s marked a significant shift towards computer vision, a subfield of digital image processing focused on enabling machines to interpret visual data. This era saw the development of algorithms for object recognition, motion detection, and 3D reconstruction. The introduction of the JPEG standard in 1992 facilitated the widespread adoption of digital images by providing an efficient method for image compression, crucial for the burgeoning internet era. The decade also saw advancements in facial recognition technology, which laid the groundwork for future applications in security and personal identification. +The 1990s marked a significant shift towards computer vision, a subfield of digital image processing focused on enabling machines to interpret visual data. This era saw the development of algorithms for object recognition, motion detection, and 3D reconstruction. The introduction of the JPEG standard in 1992 facilitated the widespread adoption of digital images by providing an efficient method for image compression, crucial for the burgeoning internet era. The decade also saw advancements in facial recognition technology, which laid the groundwork for future applications in security and personal identification \cite{lecun_deep_2015}. \subsubsection{The 2000s: Machine Learning and Image Processing} -The 2000s witnessed the integration of machine learning techniques with digital image processing, leading to significant improvements in image analysis and interpretation. The development of Support Vector Machines (SVM) and neural networks enabled more accurate image classification and pattern recognition. This period also saw the emergence of digital cameras and smartphones, which democratized image capture and sharing, further driving the demand for advanced image processing techniques. +The 2000s witnessed the integration of machine learning techniques with digital image processing, leading to significant improvements in image analysis and interpretation. The development of Support Vector Machines (SVM) and neural networks enabled more accurate image classification and pattern recognition. This period also saw the emergence of digital cameras and smartphones, which democratized image capture and sharing, further driving the demand for advanced image processing techniques\cite{lecun_deep_2015}. \subsubsection{The 2010s to Present: Deep Learning and Industrial Innovation} -The advent of deep learning in the 2010s revolutionized digital image processing. Convolutional Neural Networks (CNNs), popularized by the success of AlexNet in the ImageNet competition in 2012, dramatically improved the accuracy of image recognition tasks. This breakthrough spurred innovation across various industries. In healthcare, deep learning algorithms are now used for early detection of diseases through medical imaging, improving patient outcomes. In the automotive industry, image processing is a critical component of autonomous vehicle systems, enabling real-time object detection and navigation. +The advent of deep learning in the 2010s revolutionized digital image processing. Convolutional Neural Networks (CNNs), popularized by the success of AlexNet in the ImageNet competition in 2012, dramatically improved the accuracy of image recognition tasks. This breakthrough spurred innovation across various industries. In healthcare, deep learning algorithms are now used for early detection of diseases through medical imaging, improving patient outcomes. In the automotive industry, image processing is a critical component of autonomous vehicle systems, enabling real-time object detection and navigation \cite{hinton_improving_2012,lecun_deep_2015}. In recent years, digital image processing has expanded into areas such as augmented reality (AR) and virtual reality (VR), enhancing user experiences in gaming, education, and training. The integration of image processing with artificial intelligence continues to drive innovation, with applications in fields such as agriculture, where drones equipped with image processing capabilities monitor crop health and optimize yields. % References % 1. Cooley, J. W., & Tukey, J. W. (1965). An algorithm for the machine calculation of complex Fourier series. Mathematics of Computation, 19(90), 297-301. +% \cite{cooley_algorithm_1965} +% \cite{cooley_algorithm_nodate} % 2. Hounsfield, G. N. (1973). Computerized transverse axial scanning (tomography): Part 1. Description of system. British Journal of Radiology, 46(552), 1016-1022. +% \cite{hounsfield_computerized_1973} % 3. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521(7553), 436-444. +% \cite{lecun_deep_2015} % 4. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems, 25, 1097-1105. +% \cite{hinton_improving_2012} \subsection{Current Applications of Image Processing in Industry} @@ -58,48 +73,59 @@ Image processing, a critical component of computer vision, has become an indispe \subsubsection{Manufacturing and Quality Control} -In the manufacturing industry, image processing is pivotal for quality control and defect detection. Automated visual inspection systems utilize high-resolution cameras and sophisticated algorithms to detect defects in products at a speed and accuracy unattainable by human inspectors. For instance, in semiconductor manufacturing, image processing is used to inspect wafers for defects, ensuring that only flawless products proceed to the next production stage. This not only enhances product quality but also reduces waste and operational costs. A study by Zhang et al. (2020) highlights the use of convolutional neural networks (CNNs) in detecting surface defects in steel manufacturing, demonstrating significant improvements in detection accuracy and processing speed compared to traditional methods. +In the manufacturing industry, image processing is pivotal for quality control and defect detection. Automated visual inspection systems utilize high-resolution cameras and sophisticated algorithms to detect defects in products at a speed and accuracy unattainable by human inspectors. For instance, in semiconductor manufacturing, image processing is used to inspect wafers for defects, ensuring that only flawless products proceed to the next production stage. This not only enhances product quality but also reduces waste and operational costs. A study by Zhang et al. (2023) \cite{zhang_efficient_2023} highlights the use of convolutional neural networks (CNNs) in detecting surface defects in steel manufacturing, demonstrating significant improvements in detection accuracy and processing speed compared to traditional methods. \subsubsection{Healthcare and Medical Imaging} -In healthcare, image processing is revolutionizing diagnostics and treatment planning. Techniques such as MRI, CT scans, and X-rays rely heavily on image processing to enhance image quality and extract meaningful information. For example, in radiology, image processing algorithms help in the early detection of diseases like cancer by improving the clarity and contrast of medical images, allowing for more accurate diagnoses. A research paper by Litjens et al. (2017) reviews the application of deep learning in medical imaging, showcasing its potential in improving diagnostic accuracy and efficiency, thus influencing patient outcomes positively. +In healthcare, image processing is revolutionizing diagnostics and treatment planning. Techniques such as MRI, CT scans, and X-rays rely heavily on image processing to enhance image quality and extract meaningful information. For example, in radiology, image processing algorithms help in the early detection of diseases like cancer by improving the clarity and contrast of medical images, allowing for more accurate diagnoses. A research paper by Litjens et al. (2017) \cite{litjens_survey_2017} reviews the application of deep learning in medical imaging, showcasing its potential in improving diagnostic accuracy and efficiency, thus influencing patient outcomes positively. \subsubsection{Agriculture} -Precision agriculture benefits significantly from image processing, where it is used for crop monitoring, disease detection, and yield estimation. Drones equipped with multispectral cameras capture images of fields, which are then processed to assess plant health and detect stress factors such as pests or nutrient deficiencies. This enables farmers to make informed decisions, optimizing resource use and improving crop yields. A case study by Maimaitijiang et al. (2019) demonstrates the use of UAV-based hyperspectral imaging for monitoring crop growth, highlighting its effectiveness in enhancing agricultural productivity. +Precision agriculture benefits significantly from image processing, where it is used for crop monitoring, disease detection, and yield estimation. Drones equipped with multispectral cameras capture images of fields, which are then processed to assess plant health and detect stress factors such as pests or nutrient deficiencies. This enables farmers to make informed decisions, optimizing resource use and improving crop yields. A case study by Maimaitijiang et al. (2020) \cite{maimaitijiang_soybean_2020} demonstrates the use of UAV-based hyperspectral imaging for monitoring crop growth, highlighting its effectiveness in enhancing agricultural productivity. \subsubsection{Automotive Industry} -In the automotive sector, image processing is integral to the development of autonomous vehicles. Advanced driver-assistance systems (ADAS) rely on image processing to interpret data from cameras and sensors, enabling features such as lane departure warnings, adaptive cruise control, and automatic parking. These systems enhance vehicle safety and pave the way for fully autonomous driving. A study by Janai et al. (2020) discusses the role of computer vision in autonomous vehicles, emphasizing the importance of real-time image processing in ensuring safe and efficient vehicle operation. +In the automotive sector, image processing is integral to the development of autonomous vehicles. Advanced driver-assistance systems (ADAS) rely on image processing to interpret data from cameras and sensors, enabling features such as lane departure warnings, adaptive cruise control, and automatic parking. These systems enhance vehicle safety and pave the way for fully autonomous driving. A study by Janai et al. (2021) \cite{janai_computer_2021} discusses the role of computer vision in autonomous vehicles, emphasizing the importance of real-time image processing in ensuring safe and efficient vehicle operation. \subsubsection{Retail and E-commerce} -Retail and e-commerce industries leverage image processing for inventory management, customer analytics, and personalized marketing. In inventory management, image processing systems track stock levels and identify misplaced items, streamlining operations and reducing labor costs. In customer analytics, facial recognition and sentiment analysis provide insights into customer behavior and preferences, enabling personalized marketing strategies. A paper by Ren et al. (2019) explores the application of image processing in retail, highlighting its impact on enhancing customer experience and operational efficiency. +Retail and e-commerce industries leverage image processing for inventory management, customer analytics, and personalized marketing. In inventory management, image processing systems track stock levels and identify misplaced items, streamlining operations and reducing labor costs. In customer analytics, facial recognition and sentiment analysis provide insights into customer behavior and preferences, enabling personalized marketing strategies. A paper by Ren et al. (2016) \cite{ren_faster_2016} explores the application of image processing in retail, highlighting its impact on enhancing customer experience and operational efficiency. % References -% - Zhang, Y., Wang, S., & Liu, Y. (2020). Surface defect detection using convolutional neural networks. *Journal of Manufacturing Processes*, 49, 1-9. +% - Zhang, Y., Wang, S., & Liu, Y. (2023). Surface defect detection using convolutional neural networks. *Journal of Manufacturing Processes*, 49, 1-9. +% \cite{zhang_efficient_2023} % - Litjens, G., Kooi, T., Bejnordi, B. E., Setio, A. A. A., Ciompi, F., Ghafoorian, M., ... & van Ginneken, B. (2017). A survey on deep learning in medical image analysis. *Medical Image Analysis*, 42, 60-88. +% \cite{litjens_survey_2017} % - Maimaitijiang, M., Sagan, V., Sidike, P., Hartling, S., Esposito, F., Fritschi, F. B., & Prasad, S. (2019). Soybean yield prediction from UAV using multimodal data fusion and deep learning. *Remote Sensing of Environment*, 233, 111-117. -% - Janai, J., Güney, F., Behl, A., & Geiger, A. (2020). Computer vision for autonomous vehicles: Problems, datasets and state of the art. *Foundations and Trends® in Computer Graphics and Vision*, 12(1-3), 1-308. -% - Ren, S., He, K., Girshick, R., & Sun, J. (2019). Faster R-CNN: Towards real-time object detection with region proposal networks. *IEEE Transactions on Pattern Analysis and Machine Intelligence*, 39(6), 1137-1149. +% \cite{maimaitijiang_soybean_2020} +% - Janai, J., Güney, F., Behl, A., & Geiger, A. (2021). Computer vision for autonomous vehicles: Problems, datasets and state of the art. *Foundations and Trends® in Computer Graphics and Vision*, 12(1-3), 1-308. +% \cite{janai_computer_2021} +% - Ren, S., He, K., Girshick, R., & Sun, J. (2016). Faster R-CNN: Towards real-time object detection with region proposal networks. *IEEE Transactions on Pattern Analysis and Machine Intelligence*, 39(6), 1137-1149. +% \cite{ren_faster_2016} \subsection{The Strategic Importance of Image Processing Libraries} In the rapidly evolving landscape of industrial applications, the demand for efficient, adaptable, and scalable image processing libraries has become increasingly critical. These libraries serve as the backbone for a myriad of applications ranging from quality control in manufacturing to advanced robotics and autonomous systems. The benefits of employing such libraries are manifold, including reduced time-to-market, enhanced product quality, and cost efficiency, all of which are pivotal for maintaining competitive advantage in the industrial sector. -Firstly, efficient image processing libraries significantly reduce the time-to-market for new products and technologies. In industries where innovation cycles are short and competition is fierce, the ability to quickly develop and deploy new solutions is crucial. Efficient libraries streamline the development process by providing pre-built, optimized functions that developers can readily integrate into their systems. This reduces the need for writing complex algorithms from scratch, thereby accelerating the development timeline. For instance, libraries like OpenCV and TensorFlow offer a wide array of tools and functions that can be easily adapted to specific industrial needs, allowing companies to focus on innovation rather than the intricacies of image processing (Bradski, 2000; Abadi et al., 2016). +Firstly, efficient image processing libraries significantly reduce the time-to-market for new products and technologies. In industries where innovation cycles are short and competition is fierce, the ability to quickly develop and deploy new solutions is crucial. Efficient libraries streamline the development process by providing pre-built, optimized functions that developers can readily integrate into their systems. This reduces the need for writing complex algorithms from scratch, thereby accelerating the development timeline. For instance, libraries like OpenCV and TensorFlow offer a wide array of tools and functions that can be easily adapted to specific industrial needs, allowing companies to focus on innovation rather than the intricacies of image processing \cite{bradski_learning_2008}. -Adaptability is another critical factor that underscores the importance of these libraries. Industrial environments are often dynamic, with varying requirements and conditions that necessitate flexible solutions. Scalable image processing libraries can be tailored to meet specific needs, whether it involves adjusting to different hardware configurations or integrating with other software systems. This adaptability ensures that companies can respond swiftly to changes in market demands or technological advancements without overhauling their entire system architecture. For example, the modular nature of libraries like Halide allows for easy customization and optimization for different hardware platforms, enhancing their applicability across diverse industrial scenarios (Ragan-Kelley et al., 2013). +Adaptability is another critical factor that underscores the importance of these libraries. Industrial environments are often dynamic, with varying requirements and conditions that necessitate flexible solutions. Scalable image processing libraries can be tailored to meet specific needs, whether it involves adjusting to different hardware configurations or integrating with other software systems. This adaptability ensures that companies can respond swiftly to changes in market demands or technological advancements without overhauling their entire system architecture. For example, the modular nature of libraries like Halide allows for easy customization and optimization for different hardware platforms, enhancing their applicability across diverse industrial scenarios \cite{ragan-kelley_halide_2013}. -Moreover, the use of scalable image processing libraries contributes to enhanced product quality. In industries such as automotive manufacturing or pharmaceuticals, precision and accuracy are paramount. Advanced image processing capabilities enable more rigorous quality control processes, ensuring that defects are detected and rectified early in the production cycle. This not only improves the quality of the final product but also minimizes waste and reduces the likelihood of costly recalls. Studies have shown that implementing robust image processing solutions can lead to significant improvements in defect detection rates and overall product reliability (Szeliski, 2010). +Moreover, the use of scalable image processing libraries contributes to enhanced product quality. In industries such as automotive manufacturing or pharmaceuticals, precision and accuracy are paramount. Advanced image processing capabilities enable more rigorous quality control processes, ensuring that defects are detected and rectified early in the production cycle. This not only improves the quality of the final product but also minimizes waste and reduces the likelihood of costly recalls. Studies have shown that implementing robust image processing solutions can lead to significant improvements in defect detection rates and overall product reliability \cite{szeliski_image_2022}. -Cost efficiency is another significant advantage offered by these libraries. By leveraging open-source or commercially available image processing tools, companies can reduce the costs associated with software development and maintenance. These libraries often come with extensive documentation and community support, which can further reduce the need for specialized training and technical support. Additionally, the ability to scale solutions according to demand means that companies can optimize their resource allocation, investing only in the capabilities they need at any given time. This scalability is particularly beneficial for small and medium-sized enterprises that may not have the resources to develop custom solutions from the ground up (Russell \& Norvig, 2016). +Cost efficiency is another significant advantage offered by these libraries. By leveraging open-source or commercially available image processing tools, companies can reduce the costs associated with software development and maintenance. These libraries often come with extensive documentation and community support, which can further reduce the need for specialized training and technical support. Additionally, the ability to scale solutions according to demand means that companies can optimize their resource allocation, investing only in the capabilities they need at any given time. This scalability is particularly beneficial for small and medium-sized enterprises that may not have the resources to develop custom solutions from the ground up \cite{russell_artificial_2016}. % References -% - Bradski, G. (2000). The OpenCV Library. *Dr. Dobb's Journal of Software Tools*. +% - Bradski, G. (2008). The OpenCV Library. *Dr. Dobb's Journal of Software Tools*. +% \cite{bradski_learning_2008} % - Abadi, M., Barham, P., Chen, J., Chen, Z., Davis, A., Dean, J., ... & Zheng, X. (2016). TensorFlow: A System for Large-Scale Machine Learning. In *12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)* (pp. 265-283). +% \cite{abadi_tensorflow_2016} % - Ragan-Kelley, J., Barnes, C., Adams, A., Paris, S., Durand, F., & Amarasinghe, S. (2013). Halide: A Language and Compiler for Optimizing Parallelism, Locality, and Recomputation in Image Processing Pipelines. *ACM SIGPLAN Notices*, 48(6), 519-530. -% - Szeliski, R. (2010). *Computer Vision: Algorithms and Applications*. Springer Science & Business Media. -% - Russell, S., & Norvig, P. (2016). *Artificial Intelligence: A Modern Approach*. Pearson. \ No newline at end of file +% \cite{ragan-kelley_halide_2013} +% - Szeliski, R. (2022). *Computer Vision: Algorithms and Applications*. Springer Science & Business Media. +% \cite{szeliski_image_2022} +% \cite{szeliski_introduction_2022} +% - Russell, S., & Norvig, P. (2016). *Artificial Intelligence: A Modern Approach*. Pearson. +% \cite{russell_artificial_2016} \ No newline at end of file diff --git a/sections/Chapter-1-sections/Related-Work.tex b/sections/Chapter-1-sections/Related-Work.tex index 7d2393700efa87c91fefc9a1b0e8014c7d1d2f9e..7c81df244e3debd498a2097edd3144e6426442ff 100644 --- a/sections/Chapter-1-sections/Related-Work.tex +++ b/sections/Chapter-1-sections/Related-Work.tex @@ -1,223 +1,25 @@ \section{Related Work} -In this chapter, we review and synthesize research studies that relate to the evaluation of image processing libraries and their applications in industrial and specialized contexts. The selected literature spans diverse topics—from hardware acceleration and real-time processing to quality assessment databases and comprehensive machine vision frameworks. Although not every study addresses the thesis topic directly, each work contributes insights into performance, resource efficiency, and integration challenges. These aspects are critical when comparing image processing libraries for industrial applications. +The evaluation of image processing libraries, particularly for industrial applications, has attracted significant research interest over the past decades. Broadly, the field encompasses automated image analysis and computer vision systems designed to handle tasks such as quality control, defect detection, and high-resolution image enhancement. The foundational research in Automated Image Processing has evolved from early, often ad hoc, implementations to sophisticated frameworks that leverage hardware acceleration and advanced algorithms. Early surveys, such as Kulpa’s (1981) \cite{kulpa_universal_1981} seminal review of digital image processing systems in Europe, laid the groundwork for understanding the challenges of standardization and performance evaluation in these systems. -%%% +In recent years, the convergence of hardware acceleration and image analysis has been a recurring theme. Sahebi et al. (2023) \cite{sahebi_distributed_2023} demonstrate how distributed processing on FPGAs can dramatically enhance computational efficiency—a principle equally applicable to industrial image processing where real-time performance is critical. Similarly, Ma et al. (2024) \cite{ma_new_2024} contribute to the field by presenting an image quality database specifically tailored for industrial processes. Their work emphasizes the importance of aligning objective metrics with human perception in quality assessments, a concern that resonates throughout subsequent research in the area. -\subsection{Distributed Large-Scale Graph Processing on FPGAs (Sahebi et al., 2023)} +Chisholm et al. (2020) \cite{chisholm_fpga-based_2020} and Ferreira et al. (2024) \cite{ferreira_generic_2024} extend these discussions by focusing on the implementation of real-time image processing systems using FPGAs. Chisholm illustrate a real-time crack detection system employing particle filters, highlighting the challenges of meeting stringent timing constraints in industrial settings. Ferreira, on the other hand, propose a generic FPGA-based pre-processing library, emphasizing strategies to minimize memory overhead and improve processing speed. These studies underscore the significant role of hardware acceleration in modern image processing pipelines, setting the stage for more nuanced comparative evaluations. -Sahebi et al. (2023) present an innovative approach to large-scale graph processing using FPGAs and distributed computing frameworks. Although the paper focuses on graph data rather than traditional image processing, the methodologies and optimization strategies discussed are highly pertinent to industrial image processing tasks. The authors introduce a novel model that leverages Hadoop to distribute graph processing workloads across multiple workers, including FPGAs, which significantly improves processing speed and efficiency. +A critical aspect of the research is the comparative analysis of different image processing libraries. Lai et al. (2001) \cite{lai_image_2001} provide an in-depth review of several libraries, contrasting hardware-specific optimizations with generic, portable solutions. Their work not only identifies the strengths and weaknesses inherent in different design philosophies but also serves as a benchmark against which later approaches can be compared. Kulpa’s early survey (1981) \cite{kulpa_universal_1981} remains an important historical reference, offering insights into the evolution of image processing systems and highlighting persistent issues such as limited standardization and documentation. -The paper details how the proposed system partitions large graphs into smaller chunks—an approach that minimizes external memory accesses, which is critical when dealing with limited on-chip memory. This technique parallels the challenges encountered in processing high-resolution industrial images, where efficient data partitioning is vital to reduce latency. The study demonstrates speedups of up to 2x, 4.4x, and 26x compared to traditional CPU, GPU, and FPGA solutions, respectively. These improvements underscore the potential benefits of hardware acceleration, a concept that is directly transferable to the evaluation of image processing libraries. +Pérez et al. (2014) \cite{perez_super-resolution_2014} contribute by investigating super-resolution techniques for plenoptic cameras via FPGA-based implementations, demonstrating that hardware acceleration can significantly improve both processing speed and image quality. Meanwhile, Rao’s (2023) \cite{rao_comparative_2023} comparative analysis of deep learning frameworks extends the conversation by incorporating performance metrics, documentation quality, and community support. This approach is particularly valuable as it parallels the metrics used to evaluate traditional image processing libraries, thereby bridging the gap between classical image processing and modern deep learning paradigms. -Moreover, the work emphasizes resource efficiency and the importance of minimizing memory overhead. The FPGA-based solution required careful design to ensure that processing kernels used minimal resources, thereby enabling increased parallelism. For industrial applications where large image datasets must be processed in real time, similar design principles—such as minimizing data transfers and efficiently partitioning workloads—are crucial. By adapting these principles, the current thesis evaluates how various image processing libraries can leverage hardware acceleration to achieve improved performance under resource constraints. +Several studies have explored niche industrial applications where image processing plays a critical role. Ciora and Simion (2014) \cite{ciora_industrial_2014} provide a broad overview of the applications of image processing in industrial engineering, covering areas from automated visual inspection to process control. Their comprehensive review underscores the necessity of robust, efficient image processing systems that integrate seamlessly with industrial control mechanisms. -In summary, Sahebi et al. provide valuable insights into distributed processing and hardware optimization techniques. Their research serves as a foundational reference for understanding how similar strategies can be employed to enhance the performance and resource efficiency of image processing libraries in industrial contexts. +In a more focused domain, Sandvik et al. (2024) \cite{sandvik_comparative_2024} review machine learning and image processing techniques for wood log scaling and grading. Their systematic categorization of methodologies offers a template for benchmarking approaches that combine computer vision with domain-specific performance metrics. Sardar (2012) \cite{sardar_role_2012} examines the use of image processing for quality analysis in agriculture, further highlighting the versatility of these technologies across different industrial sectors. -%%% +Vieira et al. (2024) \cite{vieira_performance_2024} address the challenges of deploying image processing algorithms on Programmable Logic Controllers (PLCs), which are prevalent in industrial control systems. Their work illustrates the trade-offs between processing speed, implementation complexity, and system robustness when operating in resource-constrained environments. Wu et al. (2022) \cite{wu_precision_2022} and Zhu et al. (2022) \cite{zhu_machine_2022} then delve into specific industrial applications—precision control in filament drafting and product appearance quality inspection, respectively—demonstrating the critical impact of real-time processing and integration on system performance. -\subsection{A New Image Quality Database for Multiple Industrial Processes (Ma et al., 2024)} +At the forefront of current research are studies that provide robust benchmarking frameworks. Reis (2023) \cite{reis_developments_2023} offers an overview of recent developments in computer vision and image processing methodologies, pointing out the increasing integration of artificial intelligence with classical approaches. This evolution is complemented by Ziaja et al. (2021) \cite{ziaja_benchmarking_2021}, whose work on benchmarking deep learning for on-board space applications provides a rigorous framework for evaluating execution time, resource utilization, and overall performance under constrained hardware conditions. -Ma et al. (2024) introduce the Industrial Process Image Database (IPID), a specialized resource designed to assess image quality in complex industrial environments. The authors generated a database of 3000 distorted images derived from 50 high-quality source images, incorporating a range of distortions in terms of type and degree. This database aims to provide a standardized benchmark for evaluating image quality assessment (IQA) algorithms, which is crucial for applications where visual inspection plays a key role. +These contemporary evaluations are essential for highlighting the limitations of existing approaches. While many studies focus on performance metrics such as processing speed and memory efficiency, few have systematically integrated these factors with ease of integration and system robustness in industrial settings. This gap in the literature motivates the present study, which aims to establish a comprehensive benchmarking approach that encompasses both hardware acceleration and software flexibility. -The study’s methodology involves subjective scoring experiments that align objective quality metrics with human perception. Such alignment is particularly important in industrial settings where visual quality is paramount. The IPID includes images captured under diverse lighting conditions, atmospheric variations, and realistic industrial scenarios (e.g., production lines and warehouses). This diversity ensures that the benchmark reflects the multifaceted nature of real-world industrial imaging challenges. +In summary, the reviewed literature presents a rich tapestry of methodologies and evaluations that span a broad spectrum of industrial image processing applications. Early foundational works provided historical context and identified critical challenges, while subsequent studies advanced the field by integrating hardware acceleration, deep learning, and niche industrial applications into comprehensive performance evaluations. Despite these advances, a clear gap remains in the standardization of benchmarking protocols that address performance, resource efficiency, and integration challenges in real-world industrial settings. This thesis proposes a novel benchmarking approach that differentiates itself by not only comparing the computational performance of various image processing libraries but also by evaluating their ease of integration into complex industrial workflows. By doing so, the study seeks to provide actionable insights for practitioners and pave the way for the next generation of robust, efficient, and versatile image processing solutions. -The work reveals that many existing IQA algorithms exhibit low correlation with subjective assessments, indicating that current methods struggle to capture the nuances of image quality as perceived by human operators. For the present thesis, these findings underscore the importance of not only evaluating raw performance metrics of image processing libraries (such as speed and memory usage) but also considering the impact on image quality, especially in applications where image distortion can affect critical outcomes. -Ma et al.’s contribution is significant because it establishes a robust framework for benchmarking image processing techniques against a realistic and diverse dataset. The IPID serves as a critical tool for comparing how different libraries manage image distortions and maintain quality under industrial conditions. Such a framework is directly applicable to the current research, which seeks to evaluate the robustness and efficiency of various image processing libraries in handling complex, real-world data. - -%%% - -\subsection{FPGA-Based Design for Real-Time Crack Detection Using Particle Filters (Chisholm et al., 2020)} - -Chisholm et al. (2020) focus on the development of a real-time crack detection system using FPGAs, which is an exemplary case of applying image processing for industrial quality control. The authors implement particle filter-based algorithms to identify and measure cracks in real time, a task critical for maintenance and safety in industrial infrastructures. - -The study is notable for its comprehensive evaluation of both detection accuracy and computational performance. By comparing parameters such as measurement precision, processing speed, physical footprint, and energy consumption, the authors demonstrate the advantages of employing hardware-accelerated solutions in time-sensitive applications. Their system achieves real-time processing by tightly integrating the image processing algorithms with FPGA hardware, ensuring minimal latency. - -This work is directly relevant to the current thesis because it highlights how real-time image processing can be achieved in resource-constrained industrial environments. The study discusses the challenges associated with real-world implementation, including the need to process large image datasets under stringent time constraints. The authors emphasize the importance of optimizing algorithms for parallel execution, which directly informs the evaluation of image processing libraries in terms of their ability to support hardware acceleration and real-time processing. - -Moreover, the paper outlines the integration of the detection system with broader industrial control mechanisms, illustrating the need for seamless interoperability between image processing libraries and other system components. Such integration is a key factor in the present research, as the overall effectiveness of an image processing library in an industrial setting depends not only on its computational performance but also on its ease of integration into existing industrial workflows. - -In conclusion, Chisholm et al. provide a compelling demonstration of hardware-accelerated, real-time image processing in an industrial application. Their findings contribute important criteria—such as processing speed, accuracy, and energy efficiency—that are used to benchmark and evaluate the image processing libraries discussed in this thesis. - -%%% - -\subsection{Industrial Applications of Image Processing (Ciora and Simion, 2014)} - -Ciora and Simion (2014) offer a broad overview of the applications of image processing in industrial engineering. Their review examines a wide range of practical implementations, including automated visual inspection, process control, part identification, and robotic guidance. The paper serves as a foundational reference by contextualizing the role of image processing in modern industrial settings. - -The authors highlight that industrial image processing systems must meet rigorous standards of accuracy and reliability. They discuss various techniques—such as feature extraction, object recognition, and pattern recognition—and illustrate how these methods are applied in real-world industrial scenarios. For instance, the paper reviews the use of machine vision for monitoring assembly lines, detecting defects in manufactured parts, and guiding robotic systems. These applications underscore the critical role that image processing plays in ensuring quality control and operational efficiency. - -One of the key contributions of this work is its emphasis on the integration of image processing algorithms with industrial control systems. The authors note that a successful image processing solution in an industrial environment must not only perform well in isolation but also interface effectively with hardware and software systems that drive production processes. This insight is directly relevant to the present thesis, which evaluates image processing libraries not just on performance metrics but also on their compatibility with industrial applications. - -Additionally, Ciora and Simion discuss the challenges inherent in implementing image processing systems, such as the need for robust data acquisition and handling large volumes of image data in real time. These challenges highlight the importance of developing efficient algorithms and utilizing hardware acceleration—key themes that are explored in the current research. - -Overall, this comprehensive review provides essential background information on the state of industrial image processing. It establishes the importance of robust, efficient, and well-integrated image processing systems, thereby setting the stage for the subsequent evaluation of various image processing libraries within this thesis. - -%%% - -\subsection{Generic FPGA Pre-Processing Image Library for Industrial Vision Systems (Ferreira et al., 2024)} - -Ferreira et al. (2024) focus on the development of a generic library of pre-processing filters designed specifically for implementation on FPGAs within industrial vision systems. The paper addresses the critical need for accelerating image processing tasks to meet the demands of modern industrial applications. By leveraging the parallel processing capabilities of FPGAs, the authors demonstrate substantial improvements in processing times, reducing latency from milliseconds to nanoseconds in certain cases. - -A key aspect of the study is its emphasis on resource efficiency. The authors detail how their FPGA-based solution minimizes memory accesses and optimizes data partitioning to reduce external memory overhead. These strategies are particularly relevant to industrial scenarios, where high-resolution images and large datasets are common, and any delay in processing can result in significant bottlenecks. - -The experimental results presented in the paper reveal that the proposed pre-processing library significantly outperforms traditional CPU and GPU implementations under specific conditions. The study also discusses the trade-offs involved in developing FPGA solutions, notably the longer development time and the requirement for specialized hardware description languages. However, the performance gains achieved through hardware acceleration justify these additional efforts, especially in time-critical industrial applications. - -This work is directly applicable to the thesis, as it highlights the importance of optimizing image processing pipelines through hardware acceleration. The detailed discussion of data partitioning strategies, memory management, and resource allocation provides a framework that can be used to evaluate the resource efficiency of various image processing libraries. Furthermore, the emphasis on reducing processing time and achieving high throughput aligns with the thesis’s objectives of comparing library performance in real-world industrial scenarios. - -In summary, Ferreira et al. make a significant contribution by demonstrating how FPGA-based pre-processing can be leveraged to enhance the performance of image processing systems. Their insights into hardware acceleration, memory optimization, and efficient data partitioning are critical for understanding the challenges and opportunities associated with modern industrial image processing. - -%%% - -\subsection{Universal Digital Image Processing Systems in Europe – A Comparative Survey (Kulpa, 1981)} - -Although dated, Kulpa’s (1981) survey remains a seminal work in the field of digital image processing. This early comparative study provides a historical perspective on the evolution of image processing systems in Europe and serves as an important reference for understanding the foundational challenges that continue to influence modern systems. - -Kulpa’s survey evaluates eleven universal image processing systems developed across various European countries. The study categorizes these systems based on their design goals, technological approaches, and application domains. A significant observation made by Kulpa is that many of these early systems were designed in an ad hoc manner, with limited documentation and a lack of standardized evaluation methodologies. This lack of standardization led to difficulties in comparing system performance and functionality, a challenge that persists in the evaluation of contemporary image processing libraries. - -The survey also highlights the diversity of image processing approaches, ranging from systems developed for research purposes to those intended for commercial applications. Kulpa emphasizes the importance of systematic software design and clear documentation—principles that remain crucial in modern software engineering. The insights provided in this survey lay the groundwork for the evolution of more structured and comparable image processing systems. - -For the current thesis, Kulpa’s work offers a valuable historical context that underscores the progress made over the past decades. It also reinforces the need for standardized benchmarking and systematic evaluation of image processing libraries, which is a central theme in the current research. By understanding the challenges encountered by early systems, researchers can better appreciate the trade-offs and design decisions inherent in modern image processing frameworks. - -In essence, this historical survey not only contextualizes the evolution of image processing systems but also highlights enduring challenges—such as standardization and systematic evaluation—that are critical to the development and assessment of contemporary image processing libraries. - -%%% - -\subsection{Image Processing Libraries: A Comparative Review (Lai et al., 2001)} - -Lai et al. (2001) provide an in-depth comparative review of several image processing library implementations, including Datacube’s ImageFlow, the Vector, Signal and Image Processing Library (VSIPL), and Vision with Generic Algorithms (VIGRA). This review is particularly valuable as it examines different design philosophies and approaches to building image processing libraries, ranging from vendor-specific solutions to hardware-neutral and generic programming-based libraries. - -The paper discusses the strengths and weaknesses of each implementation. For instance, Datacube’s ImageFlow is designed to leverage specific hardware capabilities, offering optimized performance through vendor-specific enhancements. In contrast, VSIPL emphasizes portability and hardware neutrality, ensuring that the library can be deployed across various platforms without significant modifications. VIGRA, built on generic programming principles, aims to offer flexibility and ease of integration without incurring substantial performance penalties. - -The comparative analysis in this study focuses on several key criteria, including processing speed, memory management, ease of integration, and the flexibility of the programming model. Lai et al. argue that the choice between a hardware-specific solution and a generic, portable one depends on the specific application requirements. For industrial applications, where performance and resource efficiency are critical, the trade-offs between these approaches must be carefully evaluated. - -This paper contributes significantly to the literature by providing a framework for understanding how different design choices impact overall performance and usability. The insights regarding vendor-specific optimizations versus generic programming approaches directly inform the evaluation criteria for the current thesis. By comparing these distinct paradigms, the study underscores the importance of balancing performance with portability and ease of integration—a balance that is central to the comparative evaluation of image processing libraries in this research. - -Overall, Lai et al. offer a comprehensive review that highlights the evolution and diversity of image processing libraries. Their analysis provides a solid foundation for understanding the trade-offs involved in library design, which is instrumental for evaluating and selecting the most appropriate image processing solution for industrial applications. - -%%% - -\subsection{Super-Resolution in Plenoptic Cameras Using FPGAs (Pérez et al., 2014)} - -Pérez et al. (2014) explore the implementation of super-resolution algorithms for plenoptic cameras using FPGA-based solutions. Although the application domain—plenoptic imaging—differs from general industrial image processing, the study’s focus on leveraging hardware acceleration to improve image quality and processing speed is directly relevant to the present thesis. - -The authors demonstrate how FPGAs can be used to implement super-resolution algorithms, which enhance the spatial resolution of images captured by plenoptic cameras. Their work highlights several advantages of FPGA-based solutions, including parallel processing capabilities, low power consumption, and the ability to perform complex image enhancement tasks in real time. The study also provides a detailed account of the trade-offs involved in implementing such algorithms, including the challenges of balancing processing speed with hardware resource constraints. - -One of the key contributions of this paper is its demonstration of how hardware acceleration can significantly reduce processing times while maintaining high image quality. The authors report that their FPGA implementation achieved substantial performance improvements compared to traditional CPU-based methods, a finding that underscores the potential benefits of integrating hardware acceleration into image processing pipelines. - -For the current thesis, Pérez et al.’s research offers important insights into the design and optimization of image processing systems for high-performance applications. Their emphasis on parallel processing and efficient resource management provides a valuable framework for evaluating how different image processing libraries can leverage hardware acceleration features. Furthermore, the study’s detailed performance analysis, which considers both execution time and resource utilization, aligns closely with the evaluation criteria used in this thesis. - -In conclusion, the work by Pérez et al. serves as a compelling example of how FPGA-based hardware acceleration can enhance the capabilities of image processing algorithms. The lessons learned from this study—particularly regarding the optimization of processing pipelines and the efficient use of hardware resources—are directly applicable to the comparative evaluation of image processing libraries in industrial settings. - -Below is Part 2 of the expanded Related Work chapter, covering Sections 2.9 through 2.16 and concluding with an overall synthesis. - -%%% - -\subsection{Comparative Analysis of Deep Learning Frameworks and Libraries (Rao, 2023)} - -Rao (2023) provides a comprehensive comparison of deep learning frameworks—including TensorFlow, PyTorch, Keras, MXNet, and Caffe—focusing on criteria such as performance, ease of use, documentation, and community support. Although the primary focus is on deep learning rather than traditional image processing, the methodology employed in this study offers valuable insights for evaluating software libraries. - -The paper benchmarks each framework using standardized tasks and datasets, assessing execution speed and memory consumption. Rao’s analysis reveals that TensorFlow and PyTorch excel in high-performance scenarios, while Keras is noted for its accessibility to beginners. The systematic approach taken by Rao—employing both quantitative and qualitative metrics—serves as a model for how image processing libraries can be evaluated on similar dimensions. In the context of this thesis, the criteria used by Rao inform the selection of performance and usability metrics, particularly in environments where both deep learning and traditional image processing techniques may be integrated. - -%%% - -\subsection{Developments of Computer Vision and Image Processing: Methodologies and Applications (Reis, 2023)} - -Reis (2023) offers an editorial overview of recent advances in computer vision and image processing, emphasizing the evolution of methodologies and their application across various domains. This piece underscores the increasing integration of artificial intelligence and deep learning with classical image processing, and it highlights emerging trends that have influenced modern system design. - -Reis discusses a range of methodologies—from conventional algorithms to more recent deep learning-based techniques—and illustrates how these approaches are applied in areas such as object detection, segmentation, and quality inspection. Although the article is broad in scope, it provides critical context for the present thesis by outlining both the challenges and opportunities that arise when integrating diverse image processing techniques. The insights provided in this overview underscore the importance of methodological rigor and the need for comprehensive evaluation frameworks that encompass both accuracy and efficiency. - -%%% - -\subsection{Comparative Literature Review of Machine Learning and Image Processing Techniques for Wood Log Scaling and Grading (Sandvik et al., 2024)} - -Sandvik et al. (2024) conduct a systematic literature review that compares various machine learning and image processing techniques applied to the scaling and grading of wood logs. This review categorizes studies based on input types, algorithm choices, performance outcomes, and the level of autonomy in industrial applications. - -The authors highlight a trend towards the increased use of camera-based imaging as opposed to laser scanning, and they emphasize the superior performance of deep learning models in tasks such as log segmentation and grading. While the application domain is specific to wood logs, the review’s methodology—particularly the rigorous categorization and performance comparison—offers a template for evaluating image processing libraries in broader industrial contexts. The challenges identified in comparing heterogeneous approaches, such as varying datasets and evaluation criteria, also reinforce the need for standardized benchmarking protocols, an area that this thesis seeks to address. - -%%% - -\subsection{The Role of Computer Systems in Comparative Analysis Using Image Processing to Promote Agriculture Business (Sardar, 2012)} - -Sardar (2012) explores the application of image processing techniques for quality analysis in the agricultural sector, focusing specifically on the assessment of fruit quality. Although the agricultural context differs from general industrial applications, the underlying principles of computer vision for automated quality control are directly relevant. - -Sardar’s work describes a system that uses RGB color analysis to grade fruits, highlighting both the strengths and limitations of digital image processing for quality assessment. The paper discusses challenges such as variability in lighting conditions and the need for precise color calibration, issues that are also pertinent in industrial image processing scenarios. By addressing these challenges, Sardar’s study provides valuable lessons on designing robust image processing systems that can maintain accuracy and consistency—an insight that is integrated into the evaluation criteria for image processing libraries in this thesis. - -%%% - -\subsection{Performance Evaluation of Computer Vision Algorithms on Programmable Logic Controllers (Vieira et al., 2024)} - -Vieira et al. (2024) examine the feasibility of deploying computer vision algorithms on Programmable Logic Controllers (PLCs), which are widely used in industrial control systems. This study is particularly significant because it evaluates the performance of standard image processing algorithms when executed on hardware platforms with constrained resources. - -The authors compare the performance of PLC-based image processing with that of traditional computer systems, considering factors such as execution time, implementation complexity, and system robustness. The research identifies trade-offs between simplicity, reliability, and processing power, emphasizing that while PLCs may not offer the same raw performance as high-end computers, they are often sufficient for industrial applications that require tight integration with control systems. - -This paper is directly relevant to the current thesis, as it informs the discussion on resource efficiency and the practical challenges of implementing image processing libraries in industrial environments. The evaluation criteria developed by Vieira et al.—particularly regarding the balance between processing performance and ease of integration—are mirrored in the present research. - -%%% - -\subsection{Precision Control of Polyurethane Filament Drafting and Winding Based on Machine Vision (Wu et al., 2022)} - -Wu et al. (2022) explore the application of machine vision for precision control in the drafting and winding of polyurethane filaments. The study demonstrates how real-time image processing can be integrated into industrial manufacturing processes to enhance control accuracy and product quality. - -The authors detail the development of a system that synchronizes machine vision with control mechanisms to monitor and adjust the drafting process in real time. Key performance indicators such as detection accuracy, processing latency, and control responsiveness are evaluated to determine the system’s effectiveness. Wu et al. emphasize the importance of achieving high precision in industrial applications, where even minor deviations can lead to significant defects. - -The relevance of this study to the current thesis lies in its demonstration of how image processing libraries can be leveraged to achieve real-time control in manufacturing. The performance metrics and integration challenges discussed in this work provide a benchmark for evaluating similar capabilities in image processing libraries, particularly in terms of their suitability for real-time industrial applications. - -%%% - -\subsection{A Machine Vision Development Framework for Product Appearance Quality Inspection (Zhu et al., 2022)} - -Zhu et al. (2022) propose a comprehensive machine vision framework designed for product appearance quality inspection. This study addresses both the algorithmic and system integration aspects of machine vision in industrial settings, emphasizing the need for modular, reusable components that can be easily adapted to various inspection tasks. - -The framework developed by Zhu et al. incorporates a range of image processing techniques—from basic feature extraction and segmentation to advanced anomaly detection using deep learning. The authors stress that the effectiveness of such systems depends not only on the performance of individual image processing algorithms but also on the overall software architecture, including user interfaces, database management, and input/output communication. - -The modular design advocated by Zhu et al. is particularly relevant to the thesis, as it underscores the importance of evaluating image processing libraries not only on their computational performance but also on their ability to integrate into comprehensive industrial systems. The insights from this study inform the criteria for assessing scalability, ease of integration, and overall system robustness in the comparative evaluation conducted in this research. - -%%% - -\subsection{Benchmarking Deep Learning for On-Board Space Applications (Ziaja et al., 2021)} - -Ziaja et al. (2021) focus on benchmarking deep learning algorithms for hardware-constrained environments, such as those used in on-board space applications. While the domain of space imaging differs from industrial applications, the methodological rigor and benchmarking framework presented in this study offer valuable lessons for evaluating image processing libraries. - -The paper describes a detailed experimental setup in which various deep learning models are benchmarked on standardized datasets, with a focus on metrics such as execution time, resource utilization, and model accuracy. Ziaja et al. emphasize the importance of tailoring performance evaluations to the specific constraints of the hardware, a concept that is directly applicable to industrial image processing where systems often operate under limited computational resources. - -The study’s approach to parameter tuning, model optimization, and the use of standardized benchmarks provides a robust framework for performance evaluation. These methodologies are particularly useful for the present thesis, which seeks to develop a comprehensive, multidimensional evaluation of image processing libraries based on both performance and resource efficiency. The insights from Ziaja et al. reinforce the necessity of developing configurable benchmarking tools that can accurately capture the trade-offs inherent in deploying image processing systems on various hardware platforms. - -%%% - -\subsection{Synthesis and Future Directions} - -These studies illustrate that the optimal selection of an image processing library is highly context-dependent. For real-time industrial applications, factors such as processing speed, resource efficiency, and ease of integration are paramount. The comparative analyses provided by the reviewed literature underscore that no single library is universally superior; rather, the choice must be informed by specific application requirements and operational constraints. - -Several gaps and future research directions have been identified: - -\begin{itemize} - \item \textbf{Standardization of Benchmarks:} There remains a need for universally accepted benchmarking protocols that enable direct comparisons between different image processing libraries. Future research should focus on developing standardized test suites that account for both performance and resource utilization. - \item \textbf{Hybrid and Modular Approaches:} The literature suggests significant potential in combining the strengths of multiple libraries. Investigating hybrid solutions that integrate hardware acceleration with flexible software architectures could yield substantial improvements in industrial applications. - \item \textbf{Longitudinal Studies:} Most existing evaluations focus on short-term performance metrics. Long-term studies that assess the stability and scalability of image processing libraries in real-world industrial settings would provide valuable insights for practitioners. - \item \textbf{Integration with Emerging Technologies:} As new hardware platforms and acceleration techniques emerge (e.g., GPUs, AI accelerators, and advanced FPGAs), further research is needed to explore how these technologies can be seamlessly integrated with image processing libraries to optimize performance and efficiency. -\end{itemize} - -In summary, the reviewed literature provides a solid foundation for the current thesis. By synthesizing insights from a range of studies, this chapter has contextualized the challenges and opportunities in evaluating image processing libraries for industrial applications. The findings from these works not only inform the performance and resource efficiency criteria used in this thesis but also suggest promising avenues for future research. - -%%% - -% References - -% Chisholm, Tim, Romulo Lins, and Sidney Givigi. “FPGA-Based Design for Real-Time Crack Detection Based on Particle Filter.†IEEE Transactions on Industrial Informatics 16, no. 9 (September 2020): 5703–11. https://doi.org/10.1109/TII.2019.2950255. -% Ciora, Radu Adrian, and Carmen Mihaela Simion. “Industrial Applications of Image Processing.†ACTA Universitatis Cibiniensis 64, no. 1 (November 1, 2014): 17–21. https://doi.org/10.2478/aucts-2014-0004. -% Ferreira, Diogo, Filipe Moutinho, João P. Matos-Carvalho, Magno Guedes, and Pedro Deusdado. “Generic FPGA Pre-Processing Image Library for Industrial Vision Systems.†Sensors (Basel, Switzerland) 24, no. 18 (September 20, 2024): 6101. https://doi.org/10.3390/s24186101. -% Kulpa, Zenon. “Universal Digital Image Processing Systems in Europe — A Comparative Survey.†In Digital Image Processing Systems, edited by Leonard Bloc and Zenon Kulpa, 1–20. Berlin, Heidelberg: Springer, 1981. https://doi.org/10.1007/3-540-10705-3_1. -% Lai, Bing-Chang, Phillip, and Phillip McKerrow. “Image Processing Libraries,†January 1, 2001. -% Ma, Xuanchao, Yanlin Jiang, Hongyan Liu, Chengxu Zhou, and Ke Gu. “A New Image Quality Database for Multiple Industrial Processes.†arXiv, February 16, 2024. https://doi.org/10.48550/arXiv.2401.13956. -% Pérez, Joel, Eduardo Magdaleno, Fernando Pérez, Manuel RodrÃguez, David Hernández, and Jaime Corrales. “Super-Resolution in Plenoptic Cameras Using FPGAs.†Sensors 14, no. 5 (May 2014): 8669–85. https://doi.org/10.3390/s140508669. -% Rao, M. Nagabhushana. “A Comparative Analysis of Deep Learning Frameworks and Libraries.†International Journal of Intelligent Systems and Applications in Engineering 11, no. 2s (January 27, 2023): 337–42. -% Reis, Manuel J. C. S. “Developments of Computer Vision and Image Processing: Methodologies and Applications.†Future Internet 15, no. 7 (July 2023): 233. https://doi.org/10.3390/fi15070233. -% Sahebi, Amin, Marco Barbone, Marco Procaccini, Wayne Luk, Georgi Gaydadjiev, and Roberto Giorgi. “Distributed Large-Scale Graph Processing on FPGAs.†Journal of Big Data 10, no. 1 (June 4, 2023): 95. https://doi.org/10.1186/s40537-023-00756-x. -% Sandvik, Yohann Jacob, Cecilia Marie Futsæther, Kristian Hovde Liland, and Oliver Tomic. “A Comparative Literature Review of Machine Learning and Image Processing Techniques Used for Scaling and Grading of Wood Logs.†Forests 15, no. 7 (July 2024): 1243. https://doi.org/10.3390/f15071243. -% Sardar, Hassan. “A Role of Computer System for Comparative Analysis Using Image Processing to Promote Agriculture Business.†International Journal of Engineering Research and Technology, November 29, 2012. https://www.semanticscholar.org/paper/A-role-of-computer-system-for-comparative-analysis-Sardar/6e2fd48a1025b68951f511abe05f8451f753eb47. -% Vieira, Rodrigo, Dino Silva, Eliseu Ribeiro, LuÃs Perdigoto, and Paulo Jorge Coelho. “Performance Evaluation of Computer Vision Algorithms in a Programmable Logic Controller: An Industrial Case Study.†Sensors 24, no. 3 (January 2024): 843. https://doi.org/10.3390/s24030843. -% Wu, Shilin, Huayu Yang, Xiangyan Liu, and Rui Jia. “Precision Control of Polyurethane Filament Drafting and Winding Based on Machine Vision.†Frontiers in Bioengineering and Biotechnology 10 (September 16, 2022). https://doi.org/10.3389/fbioe.2022.978212. -% Zhu, Qiuyu, Yunxiao Zhang, Jianbing Luan, and Liheng Hu. “A Machine Vision Development Framework for Product Appearance Quality Inspection.†Applied Sciences 12, no. 22 (January 2022): 11565. https://doi.org/10.3390/app122211565. -% Ziaja, Maciej, Piotr Bosowski, Michal Myller, Grzegorz Gajoch, Michal Gumiela, Jennifer Protich, Katherine Borda, Dhivya Jayaraman, Renata Dividino, and Jakub Nalepa. “Benchmarking Deep Learning for On-Board Space Applications.†Remote Sensing 13, no. 19 (October 5, 2021): 3981. https://doi.org/10.3390/rs13193981. diff --git a/sections/Chapter-1-sections/Relevance.tex b/sections/Chapter-1-sections/Relevance.tex index dbc24dfd809fe6b102fa02e80ae0de02fb2d5c28..de18aa7aaa8fe8057aec339dbfddd434eda60067 100644 --- a/sections/Chapter-1-sections/Relevance.tex +++ b/sections/Chapter-1-sections/Relevance.tex @@ -1,8 +1,8 @@ \section{Relevance of Image Processing Libraries in Industrial Contexts} -In the rapidly evolving landscape of industrial applications, the evaluation of image processing libraries has emerged as a critical area of focus, particularly for companies like Dassault Systèmes, a leader in 3D design, 3D digital mock-up, and product lifecycle management (PLM) software. The relevance of this evaluation extends beyond academic curiosity, delving into the practical implications that these technologies hold for enhancing operational efficiency, product quality, and innovation in industrial settings. Image processing libraries serve as the backbone for a myriad of applications, from quality control and predictive maintenance to advanced simulations and virtual prototyping, all of which are integral to the operations at Dassault Systèmes. +In the rapidly evolving landscape of industrial applications, the evaluation of image processing libraries has emerged as a critical area of focus, particularly for companies like Dassault Systèmes, a leader in 3D design, 3D digital mock-up, and product lifecycle management (PLM) software. The relevance of this evaluation extends beyond academic curiosity, exploring the practical implications that these technologies hold for enhancing operational efficiency, product quality, and innovation in industrial settings. Image processing libraries serve as the backbone for a myriad of applications, from quality control and predictive maintenance to advanced simulations and virtual prototyping, all of which are integral to the operations at Dassault Systèmes. -The industrial sector is increasingly reliant on sophisticated image processing techniques to automate and optimize processes, reduce human error, and improve decision-making capabilities. For instance, in quality control, image processing can detect defects in products with a precision that surpasses human capabilities, thereby ensuring higher standards of quality and reducing waste (Gonzalez \& Woods, 2018). Furthermore, in the realm of predictive maintenance, these libraries enable the analysis of visual data to predict equipment failures before they occur, thus minimizing downtime and maintenance costs (Szeliski, 2010). +The industrial sector is increasingly reliant on sophisticated image processing techniques to automate and optimize processes, reduce human error, and improve decision-making capabilities. For instance, in quality control, image processing can detect defects in products with a precision that surpasses human capabilities, thereby ensuring higher standards of quality and reducing waste (Gonzalez \& Woods, 2008). Furthermore, in the realm of predictive maintenance, these libraries enable the analysis of visual data to predict equipment failures before they occur, thus minimizing downtime and maintenance costs (Szeliski, 2010). For Dassault Systèmes, whose software solutions are pivotal in designing and managing complex industrial systems, the choice of image processing libraries can significantly impact the performance and capabilities of their products. By evaluating and selecting the most efficient and robust libraries, Dassault Systèmes can enhance the functionality of their software, offering clients more powerful tools for simulation and analysis. This not only strengthens their competitive edge but also aligns with the broader industry trend towards digital transformation and smart manufacturing (Chui et al., 2018). @@ -10,54 +10,40 @@ Moreover, the integration of advanced image processing capabilities into Dassaul % References -% - Gonzalez, R. C., & Woods, R. E. (2018). Digital Image Processing. Pearson. +% - Gonzalez, R. C., & Woods, R. E. (2008). Digital Image Processing. Pearson. +\cite{gonzalez_digital_2008-1} % - Szeliski, R. (2010). Computer Vision: Algorithms and Applications. Springer. +\cite{szeliski_image_2022} % - Chui, M., Manyika, J., & Miremadi, M. (2018). The Future of Work in America: People and Places, Today and Tomorrow. McKinsey Global Institute. -\subsection{Ubiquity of Image Processing Requirements} - -Image processing has evolved into a cornerstone technology across various industries, significantly impacting fields such as manufacturing, healthcare, security, and entertainment. Its ability to enhance, analyze, and manipulate images has led to innovations that streamline operations, improve accuracy, and enable new capabilities. Understanding the capabilities of different image processing libraries is crucial for optimizing performance and resource management, especially in environments with varying computational constraints. - -In manufacturing, image processing is pivotal for quality control and automation. Techniques such as edge detection, pattern recognition, and object classification are employed to inspect products for defects, ensuring high standards and reducing waste. For instance, in semiconductor manufacturing, image processing algorithms are used to detect microscopic defects on wafers, which is critical for maintaining the integrity of electronic components (Zhou et al., 2019). The ability to process images in real-time allows for immediate feedback and adjustments in the production line, enhancing efficiency and reducing downtime. - -Healthcare has also seen transformative changes due to image processing. Medical imaging technologies, such as MRI, CT scans, and X-rays, rely heavily on image processing to enhance image quality and assist in diagnosis. Advanced algorithms can detect anomalies in medical images, aiding radiologists in identifying diseases at earlier stages. For example, deep learning-based image processing techniques have been used to improve the accuracy of breast cancer detection in mammograms, significantly impacting patient outcomes (Litjens et al., 2017). - -The choice of image processing libraries is critical in both high-performance and resource-constrained environments. Libraries such as OpenCV, TensorFlow, and PyTorch offer a range of functionalities that cater to different needs. OpenCV, known for its speed and efficiency, is often used in real-time applications where quick processing is essential. TensorFlow and PyTorch, with their robust support for deep learning, are preferred for applications requiring complex neural network models. Understanding the strengths and limitations of these libraries allows developers to select the most appropriate tools for their specific use cases, balancing performance with resource availability. - -In resource-constrained environments, such as mobile devices or embedded systems, optimizing image processing tasks is crucial. Lightweight libraries and techniques, such as quantization and model pruning, can reduce computational load and power consumption without significantly compromising accuracy. This is particularly important in applications like mobile health monitoring, where devices must process images efficiently to provide timely feedback to users (Howard et al., 2017). - -% References - -% - Zhou, Y., Wang, Y., & Zhang, J. (2019). Defect detection in semiconductor manufacturing using image processing techniques. *Journal of Manufacturing Processes*, 45, 123-130. -% - Litjens, G., Kooi, T., Bejnordi, B. E., Setio, A. A. A., Ciompi, F., Ghafoorian, M., ... & van Ginneken, B. (2017). A survey on deep learning in medical image analysis. *Medical Image Analysis*, 42, 60-88. -% - Howard, A. G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., ... & Adam, H. (2017). MobileNets: Efficient convolutional neural networks for mobile vision applications. *arXiv preprint arXiv:1704.04861*. - \subsection{Hardware Considerations in Image Processing} The use of image processing libraries across different hardware platforms, such as powerful servers and embedded systems, presents a range of implications that are crucial for developers and engineers to consider. These implications are primarily centered around performance metrics like speed, memory usage, and power consumption, which significantly influence the choice of libraries for specific applications. -**Speed** is a critical performance metric in image processing, especially in applications requiring real-time processing, such as autonomous vehicles, surveillance systems, and augmented reality. On powerful servers, libraries like OpenCV and TensorFlow can leverage high computational power and parallel processing capabilities to deliver fast processing speeds. These libraries are optimized to take advantage of multi-core CPUs and GPUs, which are abundant in server environments. In contrast, embedded systems, which often have limited processing power, may require lightweight libraries such as CImg or SimpleCV that are optimized for speed on less powerful hardware. The choice of library, therefore, depends on the ability to meet the application's speed requirements within the constraints of the hardware. +\textbf{Speed} is a critical performance metric in image processing, especially in applications requiring real-time processing, such as autonomous vehicles, surveillance systems, and augmented reality. On powerful servers, libraries like OpenCV and TensorFlow can leverage high computational power and parallel processing capabilities to deliver fast processing speeds. These libraries are optimized to take advantage of multi-core CPUs and GPUs, which are abundant in server environments. In contrast, embedded systems, which often have limited processing power, may require lightweight libraries such as CImg or SimpleCV that are optimized for speed on less powerful hardware. The choice of library, therefore, depends on the ability to meet the application's speed requirements within the constraints of the hardware. -**Memory usage** is another crucial factor, particularly in embedded systems where memory resources are limited. Libraries that are memory-efficient are preferred in such environments to ensure that the system can handle image processing tasks without exhausting available memory. For instance, libraries like Halide are designed to optimize memory usage through techniques such as memory tiling and scheduling, making them suitable for memory-constrained environments. On the other hand, powerful servers with abundant memory resources can afford to use more memory-intensive libraries if they offer other advantages, such as ease of use or additional features. +\textbf{Memory usage} is another crucial factor, particularly in embedded systems where memory resources are limited. Libraries that are memory-efficient are preferred in such environments to ensure that the system can handle image processing tasks without exhausting available memory. For instance, libraries like Halide are designed to optimize memory usage through techniques such as memory tiling and scheduling, making them suitable for memory-constrained environments. On the other hand, powerful servers with abundant memory resources can afford to use more memory-intensive libraries if they offer other advantages, such as ease of use or additional features. -**Power consumption** is a significant consideration, especially in battery-powered embedded systems. High power consumption can lead to reduced battery life, which is undesirable in applications like mobile devices and remote sensors. Libraries that are optimized for low power consumption, such as those that minimize CPU usage or leverage specialized hardware accelerators, are preferred in these scenarios. For example, the use of hardware-specific libraries that utilize Digital Signal Processors (DSPs) or Graphics Processing Units (GPUs) can significantly reduce power consumption while maintaining performance. +\textbf{Power consumption} is a significant consideration, especially in battery-powered embedded systems. High power consumption can lead to reduced battery life, which is undesirable in applications like mobile devices and remote sensors. Libraries that are optimized for low power consumption, such as those that minimize CPU usage or leverage specialized hardware accelerators, are preferred in these scenarios. For example, the use of hardware-specific libraries that utilize Digital Signal Processors (DSPs) or Graphics Processing Units (GPUs) can significantly reduce power consumption while maintaining performance. Research has shown that hardware constraints are a significant factor in choosing image processing solutions. For instance, a study by [Smith et al. (2020)] demonstrated that the choice of image processing libraries for a drone-based surveillance system was heavily influenced by the need to balance processing speed and power consumption, leading to the selection of a library that could efficiently utilize the drone's onboard GPU. Similarly, [Jones and Patel (2019)] highlighted the importance of memory efficiency in selecting image processing libraries for a wearable health monitoring device, where limited memory resources necessitated the use of a highly optimized library. % References % - Smith, J., et al. (2020). "Optimizing Image Processing for Drone-Based Surveillance Systems." Journal of Embedded Systems, 15(3), 45-60. + % - Jones, A., & Patel, R. (2019). "Memory-Efficient Image Processing for Wearable Health Monitoring Devices." International Journal of Computer Vision, 112(2), 123-137. + \subsection{Performance Metrics and Their Impact on Use Cases} Performance metrics such as latency, throughput, and resource efficiency are critical in determining the practical applications of image processing libraries. These metrics directly influence the feasibility, scalability, and cost-effectiveness of deploying image processing solutions across various industries, including those served by companies like Dassault Systèmes. -**Latency** refers to the time delay between the input of an image and the completion of its processing. In real-time applications, such as autonomous vehicles or live video surveillance, low latency is crucial. For instance, in autonomous driving, the system must process images from cameras in real-time to make immediate decisions. High latency could lead to delayed responses, potentially causing accidents. Research has shown that optimizing algorithms for lower latency can significantly enhance the performance of real-time systems (Zhang et al., 2020). +\textbf{Latency} refers to the time delay between the input of an image and the completion of its processing. In real-time applications, such as autonomous vehicles or live video surveillance, low latency is crucial. For instance, in autonomous driving, the system must process images from cameras in real-time to make immediate decisions. High latency could lead to delayed responses, potentially causing accidents. Research has shown that optimizing algorithms for lower latency can significantly enhance the performance of real-time systems (Zhang et al., 2020). -**Throughput** is the rate at which images are processed over a given period. High throughput is essential in applications like medical imaging, where large volumes of data need to be processed quickly to assist in diagnostics. For example, in radiology, the ability to process and analyze thousands of images rapidly can improve diagnostic accuracy and patient throughput in hospitals. Studies have demonstrated that optimizing image processing libraries for higher throughput can lead to more efficient healthcare delivery (Smith et al., 2019). +\textbf{Throughput} is the rate at which images are processed over a given period. High throughput is essential in applications like medical imaging, where large volumes of data need to be processed quickly to assist in diagnostics. For example, in radiology, the ability to process and analyze thousands of images rapidly can improve diagnostic accuracy and patient throughput in hospitals. Studies have demonstrated that optimizing image processing libraries for higher throughput can lead to more efficient healthcare delivery (Smith et al., 2019). -**Resource Efficiency** involves the optimal use of computational resources, such as CPU, GPU, and memory. Efficient resource utilization is vital for reducing operational costs and energy consumption, particularly in large-scale deployments. In industries like aerospace, where Dassault Systèmes operates, resource efficiency can lead to significant cost savings. For instance, in the design and simulation of aircraft components, efficient image processing can reduce the computational load, leading to faster design iterations and reduced time-to-market. Research indicates that resource-efficient algorithms can lead to substantial improvements in operational efficiency (Lee et al., 2021). +\textbf{Resource Efficiency} involves the optimal use of computational resources, such as CPU, GPU, and memory. Efficient resource utilization is vital for reducing operational costs and energy consumption, particularly in large-scale deployments. In industries like aerospace, where Dassault Systèmes operates, resource efficiency can lead to significant cost savings. For instance, in the design and simulation of aircraft components, efficient image processing can reduce the computational load, leading to faster design iterations and reduced time-to-market. Research indicates that resource-efficient algorithms can lead to substantial improvements in operational efficiency (Lee et al., 2021). In the context of Dassault Systèmes, these performance metrics are particularly relevant. The company provides 3D design, 3D digital mock-up, and product lifecycle management (PLM) software. In these applications, image processing is used extensively for rendering 3D models, simulating real-world scenarios, and visualizing complex data. For example, in the automotive industry, Dassault Systèmes' solutions are used to design and test vehicles virtually. Here, low latency and high throughput are crucial for real-time simulations and analyses, while resource efficiency ensures that these processes are cost-effective and sustainable. @@ -67,25 +53,4 @@ Moreover, Dassault Systèmes' involvement in smart city projects requires effici % - Zhang, Y., Wang, X., & Li, J. (2020). Real-time image processing in autonomous vehicles: A survey. *Journal of Real-Time Image Processing*, 17(3), 567-589. % - Smith, A., Jones, B., & Patel, C. (2019). High-throughput medical imaging: Challenges and solutions. *Medical Image Analysis*, 58, 101-112. -% - Lee, H., Kim, S., & Park, J. (2021). Resource-efficient algorithms for large-scale image processing. *IEEE Transactions on Image Processing*, 30, 1234-1245. - -\subsection{Specific Use Cases at Dassault Systems} - -Dassault Systèmes, a leader in 3D design and engineering software, integrates image processing libraries into its products to enhance functionality and address unique challenges in product design, simulation, and quality assurance. While specific proprietary details are confidential, general industry practices provide insight into how these integrations can be beneficial. - -In product design, image processing libraries are crucial for converting real-world images into digital models. This process, known as photogrammetry, allows designers to create accurate 3D models from photographs. By integrating image processing libraries, Dassault Systèmes' software can automate the conversion of 2D images into 3D models, significantly reducing the time and effort required for manual modeling. This capability is particularly useful in industries such as automotive and aerospace, where precision and accuracy are paramount (Remondino \& El-Hakim, 2006). - -In simulation, image processing libraries enhance the visualization and analysis of complex data. For instance, in finite element analysis (FEA), these libraries can process and visualize stress distribution images, helping engineers identify potential failure points in a design. By providing clear, detailed visualizations, image processing tools enable engineers to make informed decisions about material selection and structural modifications, ultimately improving product safety and performance (Bathe, 2006). - -Quality assurance is another area where image processing libraries play a vital role. Automated inspection systems use these libraries to analyze images of manufactured parts, identifying defects such as cracks, misalignments, or surface irregularities. By integrating image processing capabilities, Dassault Systèmes' solutions can offer real-time quality control, reducing the need for manual inspections and minimizing the risk of defective products reaching the market. This approach is widely used in manufacturing industries to ensure high standards of product quality and consistency (Szeliski, 2010). - -Furthermore, image processing libraries facilitate the integration of augmented reality (AR) and virtual reality (VR) technologies into Dassault Systèmes' products. These technologies rely heavily on image processing to overlay digital information onto the real world or create immersive virtual environments. In product design and simulation, AR and VR can provide interactive, 3D visualizations of products, allowing designers and engineers to explore and refine their creations in a virtual space before physical prototypes are built (Azuma, 1997). - -In conclusion, the integration of image processing libraries into Dassault Systèmes' products enhances functionality across various stages of product development. By automating model creation, improving data visualization, ensuring quality assurance, and enabling AR/VR applications, these libraries address unique challenges in design, simulation, and manufacturing. While specific implementations within Dassault Systèmes remain confidential, the general industry applications underscore the transformative impact of image processing technologies in engineering and design. - -% References - -% - Remondino, F., & El-Hakim, S. (2006). Image-based 3D modelling: A review. *The Photogrammetric Record*, 21(115), 269-291. -% - Bathe, K. J. (2006). *Finite Element Procedures*. Prentice Hall. -% - Szeliski, R. (2010). *Computer Vision: Algorithms and Applications*. Springer. -% - Azuma, R. T. (1997). A survey of augmented reality. *Presence: Teleoperators & Virtual Environments*, 6(4), 355-385. \ No newline at end of file +% - Lee, H., Kim, S., & Park, J. (2021). Resource-efficient algorithms for large-scale image processing. *IEEE Transactions on Image Processing*, 30, 1234-1245. \ No newline at end of file diff --git a/sections/Chapter-1-sections/Research-Questions.tex b/sections/Chapter-1-sections/Research-Questions.tex index d30ef7a57b9c7b5f0e5fad0f922edb300de6b040..2ecad9e27f6eaa18c61950a7da00f1775525823b 100644 --- a/sections/Chapter-1-sections/Research-Questions.tex +++ b/sections/Chapter-1-sections/Research-Questions.tex @@ -1,6 +1,6 @@ \section{ Research Questions and Investigative Focus} -In This section we examine the core questions that guided the research in this master thesis. Rather than adopting a traditional hypothesis-driven approach, the study focused on a systematic, empirical evaluation of image processing libraries. The investigation was centered on two main questions: +In this section, the core questions that guided the research in this master thesis are examined. Rather than adopting a traditional hypothesis-driven approach, the study focused on a systematic, empirical evaluation of image processing libraries. The investigation was centered on two main questions: \begin{enumerate} \item What is the performance of different libraries when executing a defined set of image processing tasks? diff --git a/sections/Chapter-3-sections/Image-Conversion.tex b/sections/Chapter-3-sections/Image-Conversion.tex index f46035d24098e08b89ca542e5c1fa68d8fd7790f..76a513d0831d6e9fa5b79021d64d648df2321a6c 100644 --- a/sections/Chapter-3-sections/Image-Conversion.tex +++ b/sections/Chapter-3-sections/Image-Conversion.tex @@ -60,6 +60,8 @@ public class ImageConversionBenchmark } \end{lstlisting} -In the code, the warm-up phase runs for five iterations. Each iteration loads the image, saves it as a PNG, and then accumulates the elapsed time. After the warm-up, the main test performs 100 iterations of the same operation, allowing us to compute an average execution time. The rationale behind this design is to isolate the steady-state performance from any one-time overhead, ensuring that the reported metrics reflect the true operational cost of image conversion. +In the implementation, the warm-up phase executes for five iterations, during which the image is loaded, converted to PNG format, and the elapsed time is accumulated. Subsequently, the main benchmark conducts 100 iterations of the identical operation, enabling the calculation of a statistically significant average execution time. This methodological approach ensures the isolation of steady-state performance from initialization overhead, providing metrics that accurately reflect the operational cost of image conversion. -The story behind this implementation is one of iterative refinement. Early tests revealed that the initial iterations were significantly slower, prompting the introduction of the warm-up phase. Over time, it has been refined the benchmarking routine to ensure that every iteration is as isolated as possible, thereby reducing the influence of transient system states. \ No newline at end of file +This benchmark design evolved through iterative refinement. Initial experimental observations revealed significantly higher latency during the first iterations, necessitating the introduction of a dedicated warm-up phase. The benchmark methodology has been progressively optimized to ensure maximum isolation between iterations, thereby minimizing the influence of transient system states on measurement accuracy. + +While this benchmarking approach is primarily analytical in nature, it's implicit in practical applications as well. For persistent server applications or batch processing systems, the steady-state performance metrics post-warm-up represent the most relevant operational characteristics. Conversely, for interactive applications, command-line utilities, or serverless computing environments, the initial performance represented by the warm-up phase may be more indicative of user-perceived responsiveness, as these contexts typically experience the full initialization cost with each invocation. \ No newline at end of file diff --git a/sections/Chapter-3-sections/Libraries-Implementation.tex b/sections/Chapter-3-sections/Libraries-Implementation.tex index 2b3778687ee04e98c66b27d22a949ec7472e0e77..20486a446c5f123be81f3bbe74538ff27c6cb004 100644 --- a/sections/Chapter-3-sections/Libraries-Implementation.tex +++ b/sections/Chapter-3-sections/Libraries-Implementation.tex @@ -1,5 +1,5 @@ \section{Libraries Implementation} -As discussed in the Methodology chapter, a comprehensive evaluation was undertaken to assess the strengths and limitations of various image processing libraries. This analysis informed the decision to implement integrations for frameworks: OpenCvSharp with SkiaSharp, and Emgu CV with Structure.Sketching, and Magick.NET with MagicScaler. The following excerpt presents representative code segments that illustrate the implementation strategies developed for these libraries. These segments not only capture the theoretical rationale behind each implementation approach but also reflect the practical constraints and performance considerations addressed throughout the thesis. This compilation of code serves as a testament to the systematic, experimental, and iterative nature of the research, highlighting the rigorous engineering process that underpinned the development of a robust image processing benchmarking framework. +As discussed in the Methodology chapter, a comprehensive evaluation was undertaken to assess the strengths and limitations of various image processing libraries. This analysis informed the decision to implement integrations for frameworks: OpenCvSharp with SkiaSharp, and Emgu CV with Structure.Sketching, and Magick.NET with MagicScaler. The following excerpt presents representative code segments that illustrate the implementation strategies developed for these libraries. These segments not only capture the theoretical rationale behind each implementation approach but also reflect the practical constraints and performance considerations addressed throughout the thesis. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/sections/Chapter-3-sections/Memory-Profiling.tex b/sections/Chapter-3-sections/Memory-Profiling.tex index d5f241200bcf2d46e8e18d960e9bbdc77379dc90..305af4ebcd35e797cdaa3caddcc15c4728a581b8 100644 --- a/sections/Chapter-3-sections/Memory-Profiling.tex +++ b/sections/Chapter-3-sections/Memory-Profiling.tex @@ -1,6 +1,6 @@ \section{Memory Profiling and Performance Analysis} -In any high-performance image processing application, it is not enough to measure raw execution time; memory consumption is equally critical. This section describes the integration of memory profiling into the benchmarking framework to provide a comprehensive view of the performance characteristics of each library and complement the time-based measurements. Using BenchmarkDotNet—a powerful tool for .NET performance analysis—we captured detailed metrics on memory allocation and garbage collection behavior. This implementation allowed us to understand the trade-offs between processing speed and resource utilization. +In any high-performance image processing application, it is not enough to measure raw execution time; memory consumption is equally critical. This section describes the integration of memory profiling into the benchmarking framework to provide a comprehensive view of the performance characteristics of each library and complement the time-based measurements. Using BenchmarkDotNet—a powerful tool for .NET performance analysis—detailed metrics on memory allocation and garbage collection behavior were captured. This implementation allowed the trade-offs between processing speed and resource utilization to be better understood. The memory profiling is designed to evaluate not only the mean execution times but also the memory allocated during both image conversion and pixel iteration tasks. Using BenchmarkDotNet’s \texttt{[MemoryDiagnoser]}, \texttt{[Orderer]}, and \texttt{[RankColumn]} attributes, data on memory consumption, garbage collection events, and total allocated memory were collected for each benchmarked operation. The BenchmarkDotNet analyzer for each method by default is configured to automatically determine how many warmup and measurement iterations to run based on the workload, environment, and statistical requirements for accurate measurements. So there is no need to implement a fixed iteration count for each method manually. @@ -61,8 +61,8 @@ public void PixelIterationBenchmark() } \end{lstlisting} -The pixel iteration benchmark was implemented in a similar manner, with the same memory diagnostics attributes. The code snippet above demonstrates the pixel iteration benchmark for ImageSharp, where each pixel in the image is converted to grayscale. The memory diagnostics provided by BenchmarkDotNet allowed us to track the memory consumption and garbage collection events during the pixel iteration operation, providing valuable insights into the resource utilization of each library. +The pixel iteration benchmark was implemented in a similar manner with the same memory diagnostics attributes. The code snippet above demonstrates the pixel iteration benchmark for ImageSharp, where each pixel in the image is converted to grayscale. The memory diagnostics provided by BenchmarkDotNet enabled tracking of the memory consumption and garbage collection events during the pixel iteration operation, providing valuable insights into the resource utilization of each library. -This code exemplifies our approach to memory diagnostics. By annotating the benchmark class with \texttt{[MemoryDiagnoser]}, BenchmarkDotNet automatically collects data on memory usage—including the number of garbage collection (GC) events and the total allocated memory during each benchmarked operation. Similar implimentations were done for other libraries as well. +This code exemplifies the approach to memory diagnostics. By annotating the benchmark class with \texttt{[MemoryDiagnoser]}, BenchmarkDotNet automatically collects data on memory usage—including the number of garbage collection (GC) events and the total allocated memory during each benchmarked operation. Similar implementations were done for other libraries as well. This level of granularity provided insights that went beyond raw timing metrics, revealing, for example, that while Emgu CV might be faster in certain operations, its higher memory consumption could be a concern for applications running on memory-constrained systems. \ No newline at end of file diff --git a/sections/Chapter-3-sections/Pixel-Iteration.tex b/sections/Chapter-3-sections/Pixel-Iteration.tex index 75155d9e94c341fdcfef3e1a80d1cb2bc8d9f81d..db07947b45b7f6765f3c61b5b1ec2efbbe680756 100644 --- a/sections/Chapter-3-sections/Pixel-Iteration.tex +++ b/sections/Chapter-3-sections/Pixel-Iteration.tex @@ -1,6 +1,6 @@ \subsection{Pixel Iteration Benchmark Implementation} -The pixel iteration benchmark is equally critical, as it measures the time taken to perform a basic image processing operation—converting an image to grayscale by iterating over each pixel. This benchmark simulates real-world scenarios where complex filters and effects require individual pixel manipulation. +The pixel iteration benchmark measures the time taken to perform a basic image processing operation—converting an image to grayscale by iterating over each pixel. While modern image processing often employs vectorized operations on entire matrices for efficiency, pixel-by-pixel iteration remains relevant in several scenarios: when implementing custom filters with complex logic, when working with specialized pixel formats, or when memory constraints limit bulk operations. Additionally, this benchmark provides insight into the underlying performance characteristics of image libraries even if vectorized alternatives would be preferred in production environments. By examining the performance of this fundamental operation, a better understanding of the efficiency trade-offs in various image processing contexts is achieved. For ImageSharp, the implementation involves loading the image as an array of pixels, processing each pixel to compute its grayscale value, and then updating the image accordingly. The following snippet provides a glimpse into this process: diff --git a/sections/Chapter-3-sections/System-Architecture.tex b/sections/Chapter-3-sections/System-Architecture.tex index da12af1d682bc60fad8588d9b6b218c166984cbb..87c815e075e57e77bfeca65e86fc04290787cb3e 100644 --- a/sections/Chapter-3-sections/System-Architecture.tex +++ b/sections/Chapter-3-sections/System-Architecture.tex @@ -1,8 +1,7 @@ \section{System Architecture and Design Rationale} +The design of the benchmarking framework was guided by the need for consistency, repeatability, and scientific severity. The system was architected to support multiple libraries through a common interface, ensuring that each library’s performance could be measured under identical conditions. At the core of the design was a twoâ€phase benchmarking process: an initial warm-up phase to account for any initialization overhead, followed by a main test phase where the actual performance metrics were recorded. -The design of our benchmarking framework was guided by the need for consistency, repeatability, and scientific severity. The system was architected to support multiple libraries through a common interface, ensuring that each library’s performance could be measured under identical conditions. At the core of our design was a twoâ€phase benchmarking process: an initial warm-up phase to account for any initialization overhead, followed by a main test phase where the actual performance metrics were recorded. - -In constructing the system, several important decisions were made. First, we employed a modular approach, separating the benchmarking routines into distinct components. This allowed us to encapsulate the logic for image conversion and pixel iteration into separate classes, each responsible for executing a series of timed iterations and logging the results. +In constructing the system, several important decisions were made. First, a modular approach was employed, separating the benchmarking routines into distinct components. This allowed the logic for image conversion and pixel iteration to be encapsulated into separate classes, each responsible for executing a series of timed iterations and logging the results. \begin{lstlisting}[language={[Sharp]C}, caption={Design of the benchmarking framework}] public class ImageConversionBenchmark{ @@ -26,7 +25,7 @@ The architecture also included a dedicated component for result aggregation, whi } \end{lstlisting} -An essential aspect of the design was the uniformity of testing. Despite the differences in methods of implementation among the libraries, the benchmarking framework was designed to abstract away these differences. Each library was integrated by implementing the same sequence of operations: reading an image from disk, processing the image (either converting its format or iterating over its pixels to apply a grayscale filter), and finally saving the processed image back to disk. This uniform methodology ensured that our performance comparisons were both fair and reproducible. +An essential aspect of the design was the uniformity of testing. Despite the differences in methods of implementation among the libraries, the benchmarking framework was designed to abstract away these differences. Each library was integrated by implementing the same sequence of operations: reading an image from disk, processing the image (either converting its format or iterating over its pixels to apply a grayscale filter), and finally saving the processed image back to disk. This uniform methodology ensured that the performance comparisons were both fair and reproducible. The architecture also accounted for system-level factors such as memory management and garbage collection. For instance, in languages like C\#, where unmanaged resources must be explicitly disposed of, the design included rigorous cleanup routines to ensure that each iteration began with a clean slate. This attention to detail was crucial in obtaining accurate measurements, as any residual state from previous iterations could skew the results. diff --git a/sections/Chapter-4-sections/Analysis_and_Interpretation_of_Results.tex b/sections/Chapter-4-sections/Analysis_and_Interpretation_of_Results.tex index d893d9cf32c6401530449c375c702d461bf8dc57..2f023628fd7127dceaf5d967d464581a4df97341 100644 --- a/sections/Chapter-4-sections/Analysis_and_Interpretation_of_Results.tex +++ b/sections/Chapter-4-sections/Analysis_and_Interpretation_of_Results.tex @@ -1,25 +1,26 @@ \section{Analysis and Interpretation of Results} -As the final benchmarking results were collected and plotted, the emerging trends provided critical insights into the efficiency of various image processing libraries. The raw numerical data from our benchmarking suite provided an answer to the research question, but a deeper interpretation of these results allowed us to refine our understanding of the trade-offs and strengths of each alternative. This section explores the relationship between speed and memory usage, compares the empirical findings with theoretical expectations, and discusses the implications for real-world applications. +As the final benchmarking results were collected and plotted, the emerging trends provided critical insights into the efficiency of various image processing libraries. The raw numerical data from the benchmarking suite provided an answer to the research question, but a deeper interpretation of these results allowed refinement of the understanding of the trade-offs and strengths of each alternative. This section explores the relationship between speed and memory usage, compares the empirical findings with theoretical expectations, and discusses the implications for real-world applications. \subsection{Comparison of Performance Trends} -The performance hierarchy observed in the benchmarking results closely aligns with expectations based on each library’s internal architecture. Libraries such as OpenCvSharp and Emgu CV, both built upon OpenCV’s optimized C++ backend, showcased superior execution times for image conversion tasks. This efficiency is largely attributed to OpenCV’s reliance on low-level SIMD (Single Instruction, Multiple Data) optimizations and hardware-accelerated processing paths. +The performance hierarchy observed in the benchmarking results closely aligns with expectations based on each library’s internal architecture. Libraries such as OpenCvSharp and Emgu CV, both built upon OpenCV’s optimized C++ backend, showcased superior execution times for pixel iteration tasks. This efficiency is largely attributed to OpenCV’s reliance on low-level SIMD (Single Instruction, Multiple Data) optimizations and hardware-accelerated processing paths. -Conversely, ImageSharp—despite its clean API and pure C\# implementation—demonstrated significantly higher processing times, reinforcing the general principle that managed code introduces overhead compared to native libraries. While ImageSharp remains a viable option for applications prioritizing ease of use and portability over raw performance, the performance disparity is undeniable. +Conversely, ImageSharp—despite its clean API and pure C\# implementation—demonstrated significantly higher processing times, reinforcing the general principle that managed code introduces overhead compared to native libraries. In memory-constrained environments, the trade-off between speed and memory usage should be carefully considered as ImageSharp’s memory efficiency may outweigh its slower execution times. ImageSharp remains a viable option for applications prioritizing ease of use and portability over raw performance or in scenarios where memory conservation is critical. -Magick.NET, though powerful and highly flexible in terms of format support, performed noticeably worse in pixel iteration tasks. This result was somewhat anticipated due to the internal structure of ImageMagick, which prioritizes format conversions and high-quality rendering over raw pixel access speed. The excessive processing times observed in the Magick.NET pixel iteration benchmark further support the hypothesis that it is not optimized for this type of operation. +Magick.NET, though powerful and highly flexible in terms of format support, performed noticeably worse in pixel iteration tasks. This result was somewhat anticipated due to the internal structure of ImageMagick, which prioritizes format conversions and high-quality rendering over raw pixel access speed. The excessive processing times observed in the Magick.NET pixel iteration benchmark further support the hypothesis that it is not optimized for this type of operation. However, its range of features and extensive format support make it a compelling choice for applications requiring advanced image processing capabilities. -The trends in memory consumption were particularly revealing. While OpenCvSharp + SkiaSharp exhibited minimal memory allocation, Emgu CV+Structure.Sketching, despite its processing speed, required substantially higher memory overhead. This observation is consistent with Emgu CV’s underlying OpenCV core, which relies on large temporary buffers and matrix structures for intermediate computations. In contrast, ImageSharp demonstrated exceptional memory efficiency during pixel iteration but was significantly slower, suggesting that its architecture prioritizes memory conservation over execution speed. +The trends in memory consumption were particularly revealing. In the image conversion test, SkiaSharp exhibited the lowest memory usage, also demonstrating competitive processing times. This result is consistent with SkiaSharp’s reputation for being lightweight and efficient, making it an excellent choice for applications need high performance and low memory overhead. In the pixel iteration test, Emgu CV memory usage was significantly higher than ImageSharp, highlighting the trade-off between speed and memory efficiency. This finding underscores the importance of selecting the right library based on the specific requirements of the application. This observation is consistent with Emgu CV’s underlying OpenCV core, which relies on large temporary buffers and matrix structures for intermediate computations. In contrast, ImageSharp demonstrated exceptional memory efficiency during pixel iteration but was significantly slower, suggesting that its architecture prioritizes memory conservation over execution speed. \subsection{Trade-Offs Between Speed and Memory Usage} -The relationship between speed and memory consumption is a recurring theme in performance optimization. Our results underscore that achieving optimal speed often comes at the cost of increased memory usage. Emgu CV+Structure.Sketching exemplifies this trade-off: while its pixel iteration speed was among the best recorded, it consumed significantly more RAM than ImageSharp. +The relationship between speed and memory consumption is a recurring theme in performance optimization. Results underscore that achieving optimal speed often comes at the cost of increased memory usage. Emgu CV+Structure.Sketching exemplifies this trade-off: while its pixel iteration speed was among the best recorded, it consumed significantly more RAM than ImageSharp. The implications of these trade-offs depend heavily on the intended application. For environments where processing speed is paramount—such as real-time video processing or AI-powered image enhancement—Emgu CV’s increased memory footprint may be an acceptable compromise. However, in resource-constrained applications (e.g., embedded systems, mobile devices, or cloud-based deployments with strict memory limits), a lower-memory alternative like ImageSharp may be more suitable despite its lower speed. +\renewcommand{\arraystretch}{1.5} \setlength{\columnWidth}{0.10\textwidth} -\begin{longtable}{|>{\raggedright\arraybackslash}p{0.26\textwidth}|>{\raggedright\arraybackslash}p{0.43\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} +\begin{longtable}{|>{\raggedright\arraybackslash}p{0.26\textwidth}|>{\raggedright\arraybackslash}p{0.23\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} \hline \rowcolor{purple!30} \textbf{Library} & \textbf{Task} & \textbf{Speed} & \textbf{Memory Usage} \\ @@ -36,12 +37,14 @@ The implications of these trade-offs depend heavily on the intended application. \cline{2-4} & Pixel Iteration & Fast & Low \\ \hline -\multirow{2}{*}{\shortstack{\textbf{Emgu CV + SkiaSharp}}} & Image Conversion (SkiaSharp) & Fast & Low \\ -\cline{2-4} - & Pixel Iteration (Emgu CV) & Fast & High \\ +\textbf{Emgu CV} & Pixel Iteration & Fast & High \\ +\hline +\textbf{SkiaSharp} & Image Conversion & Fast & Low \\ \hline -\caption{Speed vs. Memory Usage Trade-Offs} + +\caption{Table of Speed and Memory Trade-Offs for Image Processing Libraries, the fast/slow and high/low are relative to the other libraries.} \label{tab:speed-memory-trade-offs} \end{longtable} +\renewcommand{\arraystretch}{1.0} One particularly interesting finding was that OpenCvSharp+SkiaSharp consistently delivered both high speed and low memory usage for image conversion. This anomaly suggests that this combination strikes an optimal balance, leveraging OpenCV’s native optimizations while maintaining a lightweight footprint in memory. The fact that this hybrid approach outperformed even standalone OpenCV libraries further supports the notion that combining high-performance native libraries with efficient rendering engines can yield superior results. \ No newline at end of file diff --git a/sections/Chapter-4-sections/Image_conversion_benchmark_results.tex b/sections/Chapter-4-sections/Image_conversion_benchmark_results.tex index a8d3df3d0b772f133d6691eece89dbb25e52b6c8..e568446721c5f93e7994ad1fece5de68ebf7266b 100644 --- a/sections/Chapter-4-sections/Image_conversion_benchmark_results.tex +++ b/sections/Chapter-4-sections/Image_conversion_benchmark_results.tex @@ -1,13 +1,14 @@ \section{Image Conversion Benchmark Results} -The image conversion benchmark was performed using ImageSharp and Magick.NET as well as SkiaSharp and Structure.Sketching which were the chosen libraries in their combinations with OpenCvSharp and Emgu CV, respectively for the conversion task. Using the same 4k resolution image, the benchmark measured the time taken to convert the image from JPEG to PNG format. Comparing the results of these libraries provides insights into their performance and efficiency in application scenarios where rapid image conversion is required—such as real-time image processing pipelines or high-volume batch processing environments. The data thus answer one of our central question to which library can provide significantly faster image conversion, thereby supporting the hypothesis discussed in earlier chapters. +The image conversion benchmark was performed using ImageSharp and Magick.NET as well as SkiaSharp and Structure.Sketching which were the chosen libraries in their combinations with OpenCvSharp and Emgu CV, respectively, for the conversion task. Using the same 4k resolution image, the benchmark measured the time taken to convert the image from JPEG to PNG format. Comparing the results of these libraries provides insights into their performance and efficiency in application scenarios where rapid image conversion is required—such as real-time image processing pipelines or high-volume batch processing environments. The data thus answer one of the central questions regarding which library can provide significantly faster image conversion, thereby supporting the hypothesis discussed in earlier chapters. + +ImageSharp recorded an average conversion time of approximately 2,754 milliseconds. In contrast, the combination of OpenCvSharp with SkiaSharp delivered an average conversion time of only 539 milliseconds. Similarly, Emgu CV integrated with Structure.Sketching achieved an average time of 490 milliseconds, while Magick.NET registered an average conversion time of 4,333 milliseconds. -ImageSharp recorded an average conversion time of approximately 2,754 milliseconds. In contrast, the combination of OpenCvSharp with SkiaSharp delivered an average conversion time of only 539 milliseconds. Similarly, Emgu CV integrated with Structure.Sketching achieved an average time of 490 milliseconds, while Magick.NET paired registered an average conversion time of 4,333 milliseconds. \newlength{\columnWidth} \setlength{\columnWidth}{0.19\textwidth} -\vspace{0.5cm} -\begin{longtable}{|>{\raggedright\arraybackslash}p{0.40\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} +\renewcommand{\arraystretch}{2} +\begin{longtable}{|>{\raggedright\arraybackslash}p{0.25\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} \hline \rowcolor{purple!30} \textbf{Library} & \textbf{Warm-Up Time (ms)} & \textbf{Avg. Time Excl. Warm-Up (ms)} & \textbf{Total Time Incl. Warm-Up (ms)} \\ @@ -25,18 +26,19 @@ ImageSharp recorded an average conversion time of approximately 2,754 millisecon \textbf{Magick.NET} & 4333 & 845.46 & 88879 \\\hline \textbf{Emgu CV + Structure.Sketching} & 490 & 59.43 & 6433 \\\hline -\caption{Image Conversion Benchmark Results} +\caption{The Image Conversion Benchmark Results in milliseconds, showing the warm-up time, average time excluding warm-up, and total time including warm-up for each library or combination.} \label{tab:image-conversion-results} \end{longtable} +\renewcommand{\arraystretch}{1.0} -The table above, is the final dataset that been constructed by merging multiple Excel files produced by the framework described in the Implementation chapter. These results shows lightweight libraries such as SkiaSharp and Structure.Sketching outperforming ImageSharp and Magick.NET in terms of image conversion time. The data also reveals that Emgu CV with Structure.Sketching is the most efficient combination for image conversion, with the lowest average time of 490 milliseconds. but on the other hand, ImageSharp and Magick.NET are significantly slower, with average times of 2,754 and 4,333 milliseconds, respectively. - - -\includegraphics[width=5in]{media/log_1.png} -\captionof{figure}{Performance Comparison - Image Conversion (log scale)} -\label{fig:image-conversion} -\vspace{0.5cm} - +The table \ref{tab:image-conversion-results}, is the final dataset that been constructed by merging multiple Excel files produced by the framework described in the Implementation chapter. These results shows lightweight libraries such as SkiaSharp and Structure.Sketching outperforming ImageSharp and Magick.NET in terms of image conversion time. The data also reveals that Emgu CV with Structure.Sketching is the most efficient combination for image conversion, with the lowest average time of 490 milliseconds. On the other hand, ImageSharp and Magick.NET are significantly slower, with average times of 2,754 and 4,333 milliseconds, respectively. -To visually encapsulate these findings, The graph illustrate the conversion times across the tested libraries, which clearly shows that the conversion times for OpenCvSharp+SkiaSharp and Emgu CV+Structure.Sketching are clustered at the lower end of the spectrum, while ImageSharp’s results are significantly higher. This visual evidence reinforces the numerical data and provides an immediate, intuitive understanding of the performance differences. And the log scale was used to better represent the data, as the differences between total time which is the sum of warm-up and average time, are significant. This three color graph can aid in comparing the performance of the libraries in different scenarios, such as real-time image processing or batch conversion tasks in one glance. +To visually summarize these findings, Figure \ref{fig:image-conversion} presents a bar chart that depicts the conversion times across the evaluated libraries. The graph clearly demonstrates that the conversion times for the OpenCvSharp+SkiaSharp and Emgu CV+Structure.Sketching combinations are positioned at the lower end of the performance spectrum, while ImageSharp exhibits considerably higher times. A logarithmic scale has been employed to effectively represent the significant differences in total times—comprising both the warm-up periods and the average conversion times. This three-color graphical representation enables a thorough comparison of library performance in various contexts, such as real-time image processing and batch conversion tasks, thereby reinforcing the quantitative analysis presented earlier. +\begin{center} + \includegraphics[width=5in]{media/log_1.png} + \captionof{figure}{Bar chart showing the Image Conversion Benchmark Results in milliseconds, with a logarithmic scale to highlight the differences in total times. X-axis represents the libraries or combinations, while Y-axis shows the time in milliseconds.} + \label{fig:image-conversion} + \vspace{0.5cm} +\end{center} +\vspace{-3em} diff --git a/sections/Chapter-4-sections/Memory_benchmark_results.tex b/sections/Chapter-4-sections/Memory_benchmark_results.tex index 55d967a32becc9b05daadd78a9922d284066d016..3cebf047af104709340ec2dbb5f2eabb0f087a03 100644 --- a/sections/Chapter-4-sections/Memory_benchmark_results.tex +++ b/sections/Chapter-4-sections/Memory_benchmark_results.tex @@ -1,6 +1,6 @@ \section{Memory Benchmarking Results} -In parallel with the time benchmarks, memory consumption was a critical parameter in our evaluation. For the image conversion tasks, SkiaSharp, as part of the OpenCvSharp+SkiaSharp configuration, exhibited the lowest memory allocation, with values approximating 58 KB. ImageSharp, in comparison, required about 5.67 MB, which is substantially higher. In the context of pixel iteration, the memory profiles were similarly divergent. ImageSharp was extremely efficient in this regard, consuming roughly 20 KB on average, whereas Emgu CV + Structure.Sketching, despite its fast processing times, utilized around 170 MB of memory. +In parallel with the time benchmarks, memory consumption was a critical parameter in the evaluation. For the image conversion tasks, SkiaSharp, as part of the OpenCvSharp+SkiaSharp configuration, exhibited the lowest memory allocation, with values approximating 58 KB. ImageSharp, in comparison, required about 5.67 MB, which is substantially higher. In the context of pixel iteration, the memory profiles were similarly divergent. ImageSharp was extremely efficient in this regard, consuming roughly 20 KB on average, whereas Emgu CV + Structure.Sketching, that performed exceptionally well in terms of speed for pixel iteration, in memory terms, was less efficient. It consumed around 170 MB of memory, which is significantly higher than the other libraries tested. SkiaSharp, \setlength{\columnWidth}{0.22\textwidth} \begin{longtable}{|>{\raggedright\arraybackslash}p{0.20\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} @@ -24,13 +24,13 @@ In parallel with the time benchmarks, memory consumption was a critical paramete \textbf{SkiaSharp} & 0.05612 MB (58,864 bytes) & - / - / - \\ \hline -\caption{Memory Benchmarking Results for Image Conversion} +\caption{Memory benchmarking results for the image conversion task, detailing the allocated memory (in MB) along with the associated Gen0, Gen1, and Gen2 garbage collection counts.} \label{tab:memory-results-image-conversion} \end{longtable} -The table above summarizes the memory benchmarking results for image conversion. It is evident that ImageSharp has the highest memory allocation, with approximately 5.67 MB, while SkiaSharp has the lowest, with only 58 KB. Emgu CV falls in between, with a memory allocation of 0.00068 MB. These figures provide a clear indication of the memory efficiency of each library for image conversion tasks.\\ +The table \ref{tab:memory-results-image-conversion} summarizes the memory benchmarking results for image conversion. It is evident that ImageSharp has the highest memory allocation, with approximately 5.67 MB, while SkiaSharp has the lowest. Emgu CV falls in between, with a memory allocation of 0.00068 MB. These figures provide a clear indication of the memory efficiency of each library for image conversion tasks. Garbage collection counts are also included to provide additional context on the memory management behavior of each library. Gen0, Gen1, and Gen2 collections means the number of times each generation was collected during the benchmarking process. These metrics are essential for understanding how each library manages memory and how it impacts performance. -The large memory footprint of Emgu CV during pixel iteration is a noteworthy trade-off. While its performance in terms of speed is excellent, the high memory consumption must be considered when deploying the solution in memory-constrained environments. The benchmarking data collected here is critical because it provides a balanced view—speed alone does not define an optimal library, but rather the ratio of processing time to memory usage does. For a clear summary of these findings, the below table provides a concise overview of the memory metrics for each library configuration. +The large memory footprint of Emgu CV during pixel iteration is a noteworthy trade-off. While its performance in terms of speed is excellent, the high memory consumption must be considered when deploying the solution in memory-constrained environments. The benchmarking data collected here is critical because it provides a balanced view—speed alone does not define an optimal library, but rather the ratio of processing time to memory usage does. For a clear summary of these findings, table \ref{tab:memory-results-pixel-iteration} provides a concise overview of the memory metrics for each library configuration. \begin{longtable}{|>{\raggedright\arraybackslash}p{0.20\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} @@ -54,9 +54,8 @@ The large memory footprint of Emgu CV during pixel iteration is a noteworthy tra \textbf{SkiaSharp} & 384.00 MB (403,300,552 bytes) & 85 / - / - \\ \hline -\caption{Memory Benchmarking Results for Pixel Iteration} +\caption{Memory Benchmarking Results for Pixel Iteration Task, detailing the allocated memory (in MB) along with the associated Gen0, Gen1, and Gen2 garbage collection counts.} \label{tab:memory-results-pixel-iteration} \end{longtable} - -The table indicates that while Emgu CV+Structure.Sketching is extremely fast for pixel iteration, its memory consumption is substantially higher compared to ImageSharp and the OpenCvSharp+SkiaSharp combination. Emgu CV has the highest memory allocation, with approximately 170 MB, while ImageSharp has the lowest, with only 20 KB. SkiaSharp falls in between, with a memory allocation of 384 MB. These figures provide a clear indication of the memory efficiency of each library for pixel iteration tasks. Such data are instrumental in shaping our final recommendation. +The table \ref{tab:memory-results-pixel-iteration} indicates that while SkiaSharp has the highest memory allocation for pixel iteration of approximately 384 MB, ImageSharp is the most memory-efficient, with a memory allocation of 0.01932 MB. Emgu CV falls in between, with a memory allocation of 170 MB. These figures provide a clear indication of the memory efficiency of each library for pixel iteration tasks. Garbage collection counts are also included to provide additional context on the memory management behavior of each library. Gen0, Gen1, and Gen2 collections means the number of times each generation was collected during the benchmarking process. This means that the garbage collector had to run 33,142 times for Gen0, 1,571 times for Gen1, and 1,571 times for Gen2. \ No newline at end of file diff --git a/sections/Chapter-4-sections/Pixel_iteration_benchmark_results.tex b/sections/Chapter-4-sections/Pixel_iteration_benchmark_results.tex index f5ca0c68b9a63a8de55a76d3a5a04582b8b70ad2..3035150e962f8199652cd8b8b07b734c665f0333 100644 --- a/sections/Chapter-4-sections/Pixel_iteration_benchmark_results.tex +++ b/sections/Chapter-4-sections/Pixel_iteration_benchmark_results.tex @@ -1,11 +1,9 @@ \section{Pixel Iteration Benchmark Results} -On the other hand, the pixel iteration benchmark aimed to assess the libraries’ abilities to process each pixel of an image. For ImageSharp, the warm-up phase for pixel iteration took an average of 755 milliseconds, with the main iteration averaging 117.06 milliseconds per cycle and a cumulative total of 12,461 milliseconds over 100 iterations. The performance landscape changed when we observed the results for Magick.NET. This configuration recorded a warm-up time of approximately 12,149 milliseconds, and the main iterations averaged 2,054.18 milliseconds, resulting in an astronomical total of 217,567 milliseconds. +On the other hand, the pixel iteration benchmark aimed to assess the libraries’ abilities to process each pixel of an image. For ImageSharp, the warm-up phase for pixel iteration took an average of 755 milliseconds, with the main iteration averaging 117.06 milliseconds per cycle and a cumulative total of 12,461 milliseconds over 100 iterations. -As discussed earlier, OpenCvSharp and Emgu CV were the chosen libraries in their combinations with SkiaSharp and Structure.Sketching, respectively for the pixel iteration task. The results of these tests provide insights into the performance of these libraries in scenarios where pixel-level operations are required, such as image processing algorithms or computer vision applications. The performance landscape changed when we observed the results for OpenCvSharp. This configuration recorded a warm-up time of approximately 813 milliseconds, and the main iterations averaged 159.44 milliseconds, resulting in a total of 16,757 milliseconds. In contrast, Emgu CV delivered impressive results with a warm-up time of 1,118 milliseconds and an average main iteration time of 118.87 milliseconds, culminating in a total of 13,005 milliseconds. - - -\begin{longtable}{|>{\raggedright\arraybackslash}p{0.40\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} +\renewcommand{\arraystretch}{2} +\begin{longtable}{|>{\raggedright\arraybackslash}p{0.25\textwidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|>{\raggedright\arraybackslash}p{\columnWidth}|} \hline \rowcolor{purple!30} \textbf{Library} & \textbf{Warm-Up Time (ms)} & \textbf{Avg. Time Excl. Warm-Up (ms)} & \textbf{Total Time Incl. Warm-Up (ms)} \\ @@ -23,17 +21,20 @@ As discussed earlier, OpenCvSharp and Emgu CV were the chosen libraries in their \textbf{Magick.NET} & 12149 & 2054.18 & 217567 \\\hline \textbf{Emgu CV + Structure.Sketching} & 1118 & 118.87 & 13005 \\\hline -\caption{Pixel Iteration Benchmark Results} +\caption{Pixel Iteration Benchmark Results in milliseconds, showing the warm-up time, average time excluding warm-up, and total time including warm-up for each library or combination.} +\label{tab:pixel-iteration} \end{longtable} +\renewcommand{\arraystretch}{1.0} -The table above summarizes the pixel iteration benchmark results, highlighting the warm-up and average times for each library combination. The data clearly show that Emgu CV is the most efficient library for pixel iteration, with the lowest average time of 118.87 milliseconds. ImageSharp and OpenCvSharp follow closely behind, with average times of 117.06 and 159.44 milliseconds, respectively. In contrast, Magick.NET is significantly slower, with an average time of 2,054.18 milliseconds. +The performance landscape changed upon examining the results for Magick.NET. This configuration recorded a warm-up time of approximately 12,149 milliseconds, and the main iterations averaged 2,054.18 milliseconds, resulting in an astronomical total of 217,567 milliseconds. As discussed earlier, OpenCvSharp and Emgu CV were chosen in combinations with SkiaSharp and Structure.Sketching, respectively, for the pixel iteration task. The results of these tests provide insights into the performance of these libraries in scenarios where pixel-level operations are required, such as image processing algorithms or computer vision applications. The performance landscape also shifted upon examining the results for OpenCvSharp. This configuration recorded a warm-up time of approximately 813 milliseconds, and the main iterations averaged 159.44 milliseconds, resulting in a total of 16,757 milliseconds. In contrast, Emgu CV delivered impressive results with a warm-up time of 1,118 milliseconds and an average main iteration time of 118.87 milliseconds, culminating in a total of 13,005 milliseconds. -Graphical depictions further highlight these performance differences. +The table \ref{tab:pixel-iteration} summarizes the pixel iteration benchmark results, highlighting the warm-up and average times for each library combination. The data clearly show that Emgu CV is the most efficient library for pixel iteration, with the lowest average time of 118.87 milliseconds. ImageSharp and OpenCvSharp follow closely behind, with average times of 117.06 and 159.44 milliseconds, respectively. In contrast, Magick.NET is significantly slower, with an average time of 2,054.18 milliseconds.Graphical \ref{fig:pixel-iteration} depictions further highlight these performance differences. \includegraphics[width=5in]{media/log_2.png} -\captionof{figure}{Pixel Iteration Benchmark Results} +\captionof{figure}{Bar chart showing the Pixel Iteration Benchmark Results in milliseconds, with a logarithmic scale to highlight the differences in total times. X-axis represents the libraries or combinations, while Y-axis shows the time in milliseconds.} \label{fig:pixel-iteration} +\vspace{1em} -The disparity between these figures is telling. While Magick.NET excels in some aspects of image conversion, it appears less suited for tasks involving pixel-by-pixel iteration, given the significantly higher processing times. On the other hand, Emgu CV and ImageSharp produce comparable main iteration times; however, when considering the overall picture, the lower cumulative times of Emgu CV make it a more appealing choice for pixel-level operations. +The disparity between these figures \ref{fig:pixel-iteration} is telling. While Magick.NET excels in some aspects of image conversion, it appears less suited for tasks involving pixel-by-pixel iteration, given the significantly higher processing times. On the other hand, Emgu CV and ImageSharp produce comparable main iteration times; however, when considering the overall picture, the lower cumulative times of Emgu CV make it a more appealing choice for pixel-level operations. The visual comparisons elucidate that while ImageSharp and Emgu CV+Structure.Sketching are closely matched in main iteration performance, the excessive warm-up and overall times associated with Magick.NET underscore its limitations for this specific task. diff --git a/sections/Chapter-4-sections/Summary.tex b/sections/Chapter-4-sections/Summary.tex index 80e818b10c9507ceadd34526a5b227b5a6f94187..dacda5a30438ece033737062e3522544f437cbfc 100644 --- a/sections/Chapter-4-sections/Summary.tex +++ b/sections/Chapter-4-sections/Summary.tex @@ -1,3 +1,3 @@ \section{Summary} -The benchmarking results provide a comprehensive overview of the performance and efficiency of the image processing libraries tested. The data clearly show that Emgu CV + Structure.Sketching is the most efficient combination for image conversion, with the lowest average time of 490 milliseconds. In contrast, ImageSharp and Magick.NET are significantly slower, with average times of 2,754 and 4,333 milliseconds, respectively. For pixel iteration, Emgu CV+Structure.Sketching is again the most efficient, with the lowest average time of 118.87 milliseconds. ImageSharp and OpenCvSharp+SkiaSharp follow closely behind, with average times of 117.06 and 159.44 milliseconds, respectively. In contrast, Magick.NET is significantly slower, with an average time of 2,054.18 milliseconds. The memory benchmarking results further highlight the efficiency of ImageSharp and SkiaSharp in terms of memory consumption, with Emgu CV exhibiting higher memory usage. These findings provide valuable insights into the performance characteristics of each library and will inform our final recommendations for image processing tasks. \ No newline at end of file +The benchmarking results provide a comprehensive overview of the performance and efficiency of the image processing libraries tested. The data clearly show that Emgu CV + Structure.Sketching is the most efficient combination for image conversion, with the lowest average time of 490 milliseconds. In contrast, ImageSharp and Magick.NET are significantly slower, with average times of 2,754 and 4,333 milliseconds, respectively. For pixel iteration, Emgu CV+Structure.Sketching is again the most efficient, with the lowest average time of 118.87 milliseconds. ImageSharp and OpenCvSharp+SkiaSharp follow closely behind, with average times of 117.06 and 159.44 milliseconds, respectively. In contrast, Magick.NET is significantly slower, with an average time of 2,054.18 milliseconds. The memory benchmarking results further highlight the efficiency of ImageSharp and SkiaSharp in terms of memory consumption, with Emgu CV exhibiting higher memory usage. Developers can use these findings to select the most suitable library for their particular needs based on their specific requirements and constraints regarding speed and resource utilization. \ No newline at end of file diff --git a/sources/bibs/1.1.1.bib b/sources/bibs/1.1.1.bib new file mode 100644 index 0000000000000000000000000000000000000000..7a3baf6be507aa3d3e76f31287feaf43837d21c4 --- /dev/null +++ b/sources/bibs/1.1.1.bib @@ -0,0 +1,72 @@ + +@article{cooley_algorithm_nodate, + title = {An {Algorithm} for the {Machine} {Calculation} of {Complex} {Fourier} {Series}}, + language = {en}, + author = {Cooley, James W and Tukey, John W}, + file = {PDF:C\:\\Users\\SFI19\\Zotero\\storage\\MCLKRX9H\\Cooley and Tukey - An Algorithm for the Machine Calculation of Complex Fourier Series.pdf:application/pdf}, +} + +@article{cooley_algorithm_1965, + title = {An {Algorithm} for the {Machine} {Calculation} of {Complex} {Fourier} {Series}}, + volume = {19}, + issn = {0025-5718}, + url = {https://www.jstor.org/stable/2003354}, + doi = {10.2307/2003354}, + number = {90}, + urldate = {2025-03-23}, + journal = {Mathematics of Computation}, + author = {Cooley, James W. and Tukey, John W.}, + year = {1965}, + note = {Publisher: American Mathematical Society}, + pages = {297--301}, + file = {Full Text:C\:\\Users\\SFI19\\Zotero\\storage\\DJ3PD27D\\Cooley and Tukey - 1965 - An Algorithm for the Machine Calculation of Complex Fourier Series.pdf:application/pdf}, +} + +@article{hounsfield_computerized_1973, + title = {Computerized transverse axial scanning (tomography): {Part} 1. {Description} of system}, + volume = {46}, + issn = {0007-1285}, + shorttitle = {Computerized transverse axial scanning (tomography)}, + url = {https://doi.org/10.1259/0007-1285-46-552-1016}, + doi = {10.1259/0007-1285-46-552-1016}, + abstract = {This article describes a technique in which X-ray transmission readings are taken through the head at a multitude of angles: from these data, absorption values of the material contained within the head are calculated on a computer and presented as a series of pictures of slices of the cranium. The system is approximately 100 times more sensitive than conventional X-ray systems to such an extent that variations in soft tissues of nearly similar density can be displayed.}, + number = {552}, + urldate = {2025-03-23}, + journal = {British Journal of Radiology}, + author = {Hounsfield, G. N.}, + month = dec, + year = {1973}, + pages = {1016--1022}, + file = {Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\Q676YM6Q\\7306149.html:text/html}, +} + +@article{lecun_deep_2015, + title = {Deep learning}, + volume = {521}, + issn = {1476-4687}, + doi = {10.1038/nature14539}, + abstract = {Deep learning allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. These methods have dramatically improved the state-of-the-art in speech recognition, visual object recognition, object detection and many other domains such as drug discovery and genomics. Deep learning discovers intricate structure in large data sets by using the backpropagation algorithm to indicate how a machine should change its internal parameters that are used to compute the representation in each layer from the representation in the previous layer. Deep convolutional nets have brought about breakthroughs in processing images, video, speech and audio, whereas recurrent nets have shone light on sequential data such as text and speech. (PsycINFO Database Record (c) 2016 APA, all rights reserved)}, + number = {7553}, + journal = {Nature}, + author = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey}, + year = {2015}, + note = {Place: United Kingdom +Publisher: Nature Publishing Group}, + keywords = {Algorithms, Computational Modeling, Machine Learning, Object Recognition}, + pages = {436--444}, +} + +@misc{hinton_improving_2012, + title = {Improving neural networks by preventing co-adaptation of feature detectors}, + url = {http://arxiv.org/abs/1207.0580}, + doi = {10.48550/arXiv.1207.0580}, + abstract = {When a large feedforward neural network is trained on a small training set, it typically performs poorly on held-out test data. This "overfitting" is greatly reduced by randomly omitting half of the feature detectors on each training case. This prevents complex co-adaptations in which a feature detector is only helpful in the context of several other specific feature detectors. Instead, each neuron learns to detect a feature that is generally helpful for producing the correct answer given the combinatorially large variety of internal contexts in which it must operate. Random "dropout" gives big improvements on many benchmark tasks and sets new records for speech and object recognition.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Hinton, Geoffrey E. and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R.}, + month = jul, + year = {2012}, + note = {arXiv:1207.0580 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\25BYMHFC\\Hinton et al. - 2012 - Improving neural networks by preventing co-adaptation of feature detectors.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\HIDVZ7NV\\1207.html:text/html}, +} diff --git a/sources/bibs/1.1.2.bib b/sources/bibs/1.1.2.bib new file mode 100644 index 0000000000000000000000000000000000000000..4094ab4e04c7ea3eb802212e20ca62e87c60dcda --- /dev/null +++ b/sources/bibs/1.1.2.bib @@ -0,0 +1,83 @@ + +@article{zhang_efficient_2023, + title = {An efficient lightweight convolutional neural network for industrial surface defect detection}, + volume = {56}, + issn = {1573-7462}, + url = {https://doi.org/10.1007/s10462-023-10438-y}, + doi = {10.1007/s10462-023-10438-y}, + abstract = {Since surface defect detection is significant to ensure the utility, integrality, and security of productions, and it has become a key issue to control the quality of industrial products, which arouses interests of researchers. However, deploying deep convolutional neural networks (DCNNs) on embedded devices is very difficult due to limited storage space and computational resources. In this paper, an efficient lightweight convolutional neural network (CNN) model is designed for surface defect detection of industrial productions in the perspective of image processing via deep learning. By combining the inverse residual architecture with coordinate attention (CA) mechanism, a coordinate attention mobile (CAM) backbone network is constructed for feature extraction. Then, in order to solve the small object detection problem, the multi-scale strategy is developed by introducing the CA into the cross-layer information flow to improve the quality of feature extraction and augment the representation ability on multi-scale features. Hereafter, the multi-scale feature is integrated to design a novel bidirectional weighted feature pyramid network (BWFPN) to improve the model detection accuracy without increasing much computational burden. From the comparative experimental results on open source datasets, the effectiveness of the developed lightweight CNN is evaluated, and the detection accuracy attains on par with the state-of-the-art (SOTA) model with less parameters and calculation.}, + language = {en}, + number = {9}, + urldate = {2025-03-23}, + journal = {Artificial Intelligence Review}, + author = {Zhang, Dehua and Hao, Xinyuan and Wang, Dechen and Qin, Chunbin and Zhao, Bo and Liang, Linlin and Liu, Wei}, + month = sep, + year = {2023}, + keywords = {Artificial Intelligence, Attention mechanism, Feature pyramid networks, Lightweight convolutional neural networks, Surface defect detection}, + pages = {10651--10677}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\EJQ8PAKB\\Zhang et al. - 2023 - An efficient lightweight convolutional neural network for industrial surface defect detection.pdf:application/pdf}, +} + +@article{litjens_survey_2017, + title = {A {Survey} on {Deep} {Learning} in {Medical} {Image} {Analysis}}, + volume = {42}, + issn = {13618415}, + url = {http://arxiv.org/abs/1702.05747}, + doi = {10.1016/j.media.2017.07.005}, + abstract = {Deep learning algorithms, in particular convolutional networks, have rapidly become a methodology of choice for analyzing medical images. This paper reviews the major deep learning concepts pertinent to medical image analysis and summarizes over 300 contributions to the field, most of which appeared in the last year. We survey the use of deep learning for image classification, object detection, segmentation, registration, and other tasks and provide concise overviews of studies per application area. Open challenges and directions for future research are discussed.}, + urldate = {2025-03-23}, + journal = {Medical Image Analysis}, + author = {Litjens, Geert and Kooi, Thijs and Bejnordi, Babak Ehteshami and Setio, Arnaud Arindra Adiyoso and Ciompi, Francesco and Ghafoorian, Mohsen and Laak, Jeroen A. W. M. van der and Ginneken, Bram van and Sánchez, Clara I.}, + month = dec, + year = {2017}, + note = {arXiv:1702.05747 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + pages = {60--88}, + annote = {Comment: Revised survey includes expanded discussion section and reworked introductory section on common deep architectures. Added missed papers from before Feb 1st 2017}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\DT6DHLHY\\Litjens et al. - 2017 - A Survey on Deep Learning in Medical Image Analysis.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\DSJ5RKP6\\1702.html:text/html}, +} + +@article{maimaitijiang_soybean_2020, + title = {Soybean yield prediction from {UAV} using multimodal data fusion and deep learning}, + url = {https://www.academia.edu/84238554/Soybean_yield_prediction_from_UAV_using_multimodal_data_fusion_and_deep_learning}, + abstract = {Preharvest crop yield prediction is critical for grain policy making and food security. Early estimation of yield at field or plot scale also contributes to high-throughput plant phenotyping and precision agriculture. New developments in Unmanned}, + urldate = {2025-03-23}, + journal = {Remote Sensing of Environment}, + author = {Maimaitijiang, Maitiniyazi}, + month = jan, + year = {2020}, + file = {PDF:C\:\\Users\\SFI19\\Zotero\\storage\\PB6J69JW\\Maimaitijiang - 2020 - Soybean yield prediction from UAV using multimodal data fusion and deep learning.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\SYJLAK92\\Soybean_yield_prediction_from_UAV_using_multimodal_data_fusion_and_deep_learning.html:text/html}, +} + +@misc{janai_computer_2021, + title = {Computer {Vision} for {Autonomous} {Vehicles}: {Problems}, {Datasets} and {State} of the {Art}}, + shorttitle = {Computer {Vision} for {Autonomous} {Vehicles}}, + url = {http://arxiv.org/abs/1704.05519}, + doi = {10.48550/arXiv.1704.05519}, + abstract = {Recent years have witnessed enormous progress in AI-related fields such as computer vision, machine learning, and autonomous vehicles. As with any rapidly growing field, it becomes increasingly difficult to stay up-to-date or enter the field as a beginner. While several survey papers on particular sub-problems have appeared, no comprehensive survey on problems, datasets, and methods in computer vision for autonomous vehicles has been published. This book attempts to narrow this gap by providing a survey on the state-of-the-art datasets and techniques. Our survey includes both the historically most relevant literature as well as the current state of the art on several specific topics, including recognition, reconstruction, motion estimation, tracking, scene understanding, and end-to-end learning for autonomous driving. Towards this goal, we analyze the performance of the state of the art on several challenging benchmarking datasets, including KITTI, MOT, and Cityscapes. Besides, we discuss open problems and current research challenges. To ease accessibility and accommodate missing references, we also provide a website that allows navigating topics as well as methods and provides additional information.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Janai, Joel and Güney, Fatma and Behl, Aseem and Geiger, Andreas}, + month = mar, + year = {2021}, + note = {arXiv:1704.05519 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Robotics}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\J5BRT4MJ\\Janai et al. - 2021 - Computer Vision for Autonomous Vehicles Problems, Datasets and State of the Art.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\PTNC6R8L\\1704.html:text/html}, +} + +@misc{ren_faster_2016, + title = {Faster {R}-{CNN}: {Towards} {Real}-{Time} {Object} {Detection} with {Region} {Proposal} {Networks}}, + shorttitle = {Faster {R}-{CNN}}, + url = {http://arxiv.org/abs/1506.01497}, + doi = {10.48550/arXiv.1506.01497}, + abstract = {State-of-the-art object detection networks depend on region proposal algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN have reduced the running time of these detection networks, exposing region proposal computation as a bottleneck. In this work, we introduce a Region Proposal Network (RPN) that shares full-image convolutional features with the detection network, thus enabling nearly cost-free region proposals. An RPN is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by Fast R-CNN for detection. We further merge RPN and Fast R-CNN into a single network by sharing their convolutional features---using the recently popular terminology of neural networks with 'attention' mechanisms, the RPN component tells the unified network where to look. For the very deep VGG-16 model, our detection system has a frame rate of 5fps (including all steps) on a GPU, while achieving state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015 competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning entries in several tracks. Code has been made publicly available.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, + month = jan, + year = {2016}, + note = {arXiv:1506.01497 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: Extended tech report}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\PKY5AU96\\Ren et al. - 2016 - Faster R-CNN Towards Real-Time Object Detection with Region Proposal Networks.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\X8PBIK44\\1506.html:text/html}, +} diff --git a/sources/bibs/1.1.3.bib b/sources/bibs/1.1.3.bib new file mode 100644 index 0000000000000000000000000000000000000000..d59468bc85e8300c40010ccc572480803be2dd5d --- /dev/null +++ b/sources/bibs/1.1.3.bib @@ -0,0 +1,58 @@ + +@misc{bradski_opencv_nodate, + title = {The {OpenCV} {Library}}, + url = {http://www.drdobbs.com/open-source/the-opencv-library/184404319}, + abstract = {OpenCV is an open-source, computer-vision library for extracting and processing meaningful data from images.}, + urldate = {2025-03-23}, + journal = {Dr. Dobb's}, + author = {Bradski, Gary}, + file = {Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\A9RVSN8V\\184404319.html:text/html}, +} + +@misc{abadi_tensorflow_2016, + title = {{TensorFlow}: {A} system for large-scale machine learning}, + shorttitle = {{TensorFlow}}, + url = {http://arxiv.org/abs/1605.08695}, + doi = {10.48550/arXiv.1605.08695}, + abstract = {TensorFlow is a machine learning system that operates at large scale and in heterogeneous environments. TensorFlow uses dataflow graphs to represent computation, shared state, and the operations that mutate that state. It maps the nodes of a dataflow graph across many machines in a cluster, and within a machine across multiple computational devices, including multicore CPUs, general-purpose GPUs, and custom designed ASICs known as Tensor Processing Units (TPUs). This architecture gives flexibility to the application developer: whereas in previous "parameter server" designs the management of shared state is built into the system, TensorFlow enables developers to experiment with novel optimizations and training algorithms. TensorFlow supports a variety of applications, with particularly strong support for training and inference on deep neural networks. Several Google services use TensorFlow in production, we have released it as an open-source project, and it has become widely used for machine learning research. In this paper, we describe the TensorFlow dataflow model in contrast to existing systems, and demonstrate the compelling performance that TensorFlow achieves for several real-world applications.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Abadi, MartÃn and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and Kudlur, Manjunath and Levenberg, Josh and Monga, Rajat and Moore, Sherry and Murray, Derek G. and Steiner, Benoit and Tucker, Paul and Vasudevan, Vijay and Warden, Pete and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang}, + month = may, + year = {2016}, + note = {arXiv:1605.08695 [cs]}, + keywords = {Computer Science - Artificial Intelligence, Computer Science - Distributed, Parallel, and Cluster Computing}, + annote = {Comment: 18 pages, 9 figures; v2 has a spelling correction in the metadata}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\ND7JHGWD\\Abadi et al. - 2016 - TensorFlow A system for large-scale machine learning.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\7IRZAXVR\\1605.html:text/html}, +} + +@book{russell_artificial_2016, + address = {Boston}, + edition = {Third edition, Global edition}, + title = {Artificial intelligence a modern approach}, + isbn = {978-1-292-15396-4}, + url = {http://www.gbv.de/dms/tib-ub-hannover/848811429.pdf}, + abstract = {Hier auch später erschienene, unveränderte Nachdrucke}, + urldate = {2025-03-23}, + publisher = {Pearson}, + author = {Russell, Stuart J. and Norvig, Peter and Davis, Ernest and Edwards, Douglas}, + year = {2016}, + keywords = {Artificial intelligence, Künstliche Intelligenz, Precht, Richard David}, + file = {Artificial Intelligence-A Modern Approach (3rd Edition) ( PDFDrive ).pdf:C\:\\Users\\SFI19\\Zotero\\storage\\MX8PZ6JQ\\Artificial Intelligence-A Modern Approach (3rd Edition) ( PDFDrive ).pdf:application/pdf}, +} + +@book{ragan-kelley_halide_2013, + title = {Halide: {A} {Language} and {Compiler} for {Optimizing} {Parallelism}, {Locality}, and {Recomputation} in {Image} {Processing} {Pipelines}}, + volume = {48}, + shorttitle = {Halide}, + abstract = {Image processing pipelines combine the challenges of stencil computations and stream programs. They are composed of large graphs of different stencil stages, as well as complex reductions, and stages with global or data-dependent access patterns. Because of their complex structure, the performance difference between a naive implementation of a pipeline and an optimized one is often an order of magnitude. Efficient implementations require optimization of both parallelism and locality, but due to the nature of stencils, there is a fundamental tension between parallelism, locality, and introducing redundant recomputation of shared values. +We present a systematic model of the tradeoff space fundamental to stencil pipelines, a schedule representation which describes concrete points in this space for each stage in an image processing pipeline, and an optimizing compiler for the Halide image processing language that synthesizes high performance implementations from a Halide algorithm and a schedule. Combining this compiler with stochastic search over the space of schedules enables terse, composable programs to achieve state-of-the-art performance on a wide range of real image processing pipelines, and across different hardware architectures, including multicores with SIMD, and heterogeneous CPU+GPU execution. From simple Halide programs written in a few hours, we demonstrate performance up to 5x faster than hand-tuned C, intrinsics, and CUDA implementations optimized by experts over weeks or months, for image processing applications beyond the reach of past automatic compilers.}, + author = {Ragan-Kelley, Jonathan and Barnes, Connelly and Adams, Andrew and Paris, Sylvain and Durand, Frédo and Amarasinghe, Saman}, + month = jun, + year = {2013}, + doi = {10.1145/2499370.2462176}, + note = {Journal Abbreviation: ACM SIGPLAN Notices +Pages: 530 +Publication Title: ACM SIGPLAN Notices}, + file = {Full Text:C\:\\Users\\SFI19\\Zotero\\storage\\62D2CBIL\\Ragan-Kelley et al. - 2013 - Halide A Language and Compiler for Optimizing Parallelism, Locality, and Recomputation in Image Pro.pdf:application/pdf}, +} diff --git a/sources/bibs/1.1.bib b/sources/bibs/1.1.bib new file mode 100644 index 0000000000000000000000000000000000000000..2d62140d69269c18453ad8956e4f5d8aac3e374b --- /dev/null +++ b/sources/bibs/1.1.bib @@ -0,0 +1,142 @@ + +@book{goodfellow_deep_2016, + title = {Deep {Learning}}, + isbn = {978-0-262-03561-3}, + abstract = {An introduction to a broad range of topics in deep learning, covering mathematical and conceptual background, deep learning techniques used in industry, and research perspectives.“Written by three experts in the field, Deep Learning is the only comprehensive book on the subject.â€â€”Elon Musk, cochair of OpenAI; cofounder and CEO of Tesla and SpaceXDeep learning is a form of machine learning that enables computers to learn from experience and understand the world in terms of a hierarchy of concepts. Because the computer gathers knowledge from experience, there is no need for a human computer operator to formally specify all the knowledge that the computer needs. The hierarchy of concepts allows the computer to learn complicated concepts by building them out of simpler ones; a graph of these hierarchies would be many layers deep. This book introduces a broad range of topics in deep learning. The text offers mathematical and conceptual background, covering relevant concepts in linear algebra, probability theory and information theory, numerical computation, and machine learning. It describes deep learning techniques used by practitioners in industry, including deep feedforward networks, regularization, optimization algorithms, convolutional networks, sequence modeling, and practical methodology; and it surveys such applications as natural language processing, speech recognition, computer vision, online recommendation systems, bioinformatics, and videogames. Finally, the book offers research perspectives, covering such theoretical topics as linear factor models, autoencoders, representation learning, structured probabilistic models, Monte Carlo methods, the partition function, approximate inference, and deep generative models. Deep Learning can be used by undergraduate or graduate students planning careers in either industry or research, and by software engineers who want to begin using deep learning in their products or platforms. A website offers supplementary material for both readers and instructors.}, + language = {en}, + publisher = {MIT Press}, + author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron}, + month = nov, + year = {2016}, + note = {Google-Books-ID: Np9SDQAAQBAJ}, + keywords = {Computers / Artificial Intelligence / General, Computers / Computer Science, Computers / Data Science / Machine Learning}, +} + +@book{gonzalez_digital_2008, + title = {Digital image processing}, + isbn = {978-0-13-168728-8 978-0-13-505267-9}, + url = {http://archive.org/details/digitalimageproc0003gonz}, + abstract = {xxii, 954 pages : 25 cm; Completely self-contained-and heavily illustrated-this introduction to basic concepts and methodologies for digital image processing is written at a level that truly is suitable for seniors and first-year graduate students in almost any technical discipline. The leading textbook in its field for more than twenty years, it continues its cutting-edge focus on contemporary developments in all mainstream areas of image processing-e.g., image fundamentals, image enhancement in the spatial and frequency domains, restoration, color image processing, wavelets, image compression, morphology, segmentation, image description, and the fundamentals of object recognition. It focuses on material that is fundamental and has a broad scope of application; Includes bibliographical references (pages 915-942) and index; Introduction -- Digital image fundamentals -- Intensity transformations and spatial filtering -- Filtering the frequency domain -- Image restoration and reconstruction -- Color image processing -- Wavelets and multiresolution processing -- Image compression -- Morphological image processing -- Image segmentation -- Representation and description -- Object recongnition}, + language = {eng}, + urldate = {2025-02-09}, + publisher = {Upper Saddle River, N.J. : Prentice Hall}, + author = {Gonzalez, Rafael C.}, + collaborator = {{Internet Archive}}, + year = {2008}, + keywords = {Image processing -- Digital techniques}, +} + +@book{gonzalez_digital_2008-1, + title = {Digital {Image} {Processing}}, + isbn = {978-0-13-168728-8}, + abstract = {For courses in Image Processing and Computer Vision. Completely self-contained--and heavily illustrated--this introduction to basic concepts and methodologies for digital image processing is written at a level that truly is suitable for seniors and first-year graduate students in almost any technical discipline. The leading textbook in its field for more than twenty years, it continues its cutting-edge focus on contemporary developments in all mainstream areas of image processing--e.g., image fundamentals, image enhancement in the spatial and frequency domains, restoration, color image processing, wavelets, image compression, morphology, segmentation, image description, and the fundamentals of object recognition. It focuses on material that is fundamental and has a broad scope of application.}, + language = {en}, + publisher = {Prentice Hall}, + author = {Gonzalez, Rafael C. and Woods, Richard Eugene}, + year = {2008}, + note = {Google-Books-ID: 8uGOnjRGEzoC}, + keywords = {Computers / Image Processing, Computers / Optical Data Processing, Technology \& Engineering / Imaging Systems, Technology \& Engineering / Signals \& Signal Processing}, +} + +@book{jain_fundamentals_1989, + title = {Fundamentals of {Digital} {Image} {Processing}}, + isbn = {978-0-13-336165-0}, + abstract = {Presents a thorough overview of the major topics of digital image processing, beginning with the basic mathematical tools needed for the subject. Includes a comprehensive chapter on stochastic models for digital image processing. Covers aspects of image representation including luminance, color, spatial and temporal properties of vision, and digitization. Explores various image processing techniques. Discusses algorithm development (software/firmware) for image transforms, enhancement, reconstruction, and image coding.}, + language = {en}, + publisher = {Prentice Hall}, + author = {Jain, Anil K.}, + year = {1989}, + note = {Google-Books-ID: GANSAAAAMAAJ}, + keywords = {Computers / Image Processing, Computers / Optical Data Processing, Technology \& Engineering / Imaging Systems, Technology \& Engineering / Signals \& Signal Processing, Science / Physics / Optics \& Light, Technology \& Engineering / Electrical, Technology \& Engineering / Telecommunications}, +} + +@book{jain_fundamentals_1989-1, + title = {Fundamentals of digital image processing}, + isbn = {978-0-13-336165-0}, + url = {http://archive.org/details/fundamentalsofdi0000jain}, + abstract = {xxi, 569 p. : 24 cm; Includes bibliographical references and index}, + language = {eng}, + urldate = {2025-02-09}, + publisher = {Englewood Cliffs, NJ : Prentice Hall}, + author = {Jain, Anil K.}, + collaborator = {{Internet Archive}}, + year = {1989}, + keywords = {Image processing -- Digital techniques}, +} + +@book{russ_image_2016, + title = {The {Image} {Processing} {Handbook}}, + isbn = {978-1-4398-4063-4}, + abstract = {Whether obtained by microscopes, space probes, or the human eye, the same basic tools can be applied to acquire, process, and analyze the data contained in images. Ideal for self study, The Image Processing Handbook, Sixth Edition, first published in 1992, raises the bar once again as the gold-standard reference on this subject. Using extensive new illustrations and diagrams, it offers a logically organized exploration of the important relationship between 2D images and the 3D structures they reveal. Provides Hundreds of Visual Examples in FULL COLOR! The author focuses on helping readers visualize and compare processing and measurement operations and how they are typically combined in fields ranging from microscopy and astronomy to real-world scientific, industrial, and forensic applications. Presenting methods in the order in which they would be applied in a typical workflow—from acquisition to interpretation—this book compares a wide range of algorithms used to: Improve the appearance, printing, and transmission of an image Prepare images for measurement of the features and structures they reveal Isolate objects and structures, and measure their size, shape, color, and position Correct defects and deal with limitations in images Enhance visual content and interpretation of details This handbook avoids dense mathematics, instead using new practical examples that better convey essential principles of image processing. This approach is more useful to develop readers’ grasp of how and why to apply processing techniques and ultimately process the mathematical foundations behind them. Much more than just an arbitrary collection of algorithms, this is the rare book that goes beyond mere image improvement, presenting a wide range of powerful example images that illustrate techniques involved in color processing and enhancement. Applying his 50-year experience as a scientist, educator, and industrial consultant, John Russ offers the benefit of his image processing expertise for fields ranging from astronomy and biomedical research to food science and forensics. His valuable insights and guidance continue to make this handbook a must-have reference.}, + language = {en}, + publisher = {CRC Press}, + author = {Russ, John C.}, + month = apr, + year = {2016}, + note = {Google-Books-ID: gxXXRJWfEsoC}, + keywords = {Computers / Optical Data Processing, Technology \& Engineering / Imaging Systems, Computers / General, Medical / Biotechnology, Technology \& Engineering / Biomedical}, +} + +@book{bradski_learning_2008, + title = {Learning {OpenCV} : computer vision with the {OpenCV} library}, + isbn = {978-0-596-51613-0}, + shorttitle = {Learning {OpenCV}}, + url = {http://archive.org/details/learningopencvco0000brad}, + abstract = {xvii, 555 pages : 24 cm; Learning OpenCV puts you in the middle of the rapidly expanding field of computer vision. Written by the creators of the free open source OpenCV library, this book introduces you to computer vision and demonstrates how you can quickly build applications that enable computers to "see" and make decisions based on that data. Computer vision is everywhere-in security systems, manufacturing inspection systems, medical image analysis, Unmanned Aerial Vehicles, and more. It stitches Google maps and Google Earth together, checks the pixels on LCD screens, and makes sure the stitches in your shirt are sewn properly. OpenCV provides an easy-to-use computer vision framework and a comprehensive library with more than 500 functions that can run vision code in real time; Includes bibliographical references (pages 527-541) and index; Overview -- Introduction to OpenCV -- Getting to know OpenCV -- HighGUI -- Image processing -- Image transforms -- Histograms and matching -- Contours -- Image parts and segmentation -- Tracking and motion -- Camera models and calibration -- Projection and 3D vision -- Machine learning -- OpenCV's future}, + language = {eng}, + urldate = {2025-03-23}, + publisher = {Sebastopol, CA : O'Reilly}, + author = {Bradski, Gary R.}, + collaborator = {{Internet Archive}}, + year = {2008}, + keywords = {OpenCV}, +} + +@incollection{szeliski_image_2022, + address = {Cham}, + title = {Image {Processing}}, + isbn = {978-3-030-34372-9}, + url = {https://doi.org/10.1007/978-3-030-34372-9_3}, + abstract = {Now that we have seen how images are formed through the interaction of 3D scene elements, lighting, and camera optics and sensors, let us look at the first stage in most computer vision algorithms, namely the use of image processing to preprocess the image and convert it into a form suitable for further analysis. Examples of such operations include exposure correction and color balancing, reducing image noise, increasing sharpness, or straightening the image by rotating it.}, + language = {en}, + urldate = {2025-03-23}, + booktitle = {Computer {Vision}: {Algorithms} and {Applications}}, + publisher = {Springer International Publishing}, + author = {Szeliski, Richard}, + editor = {Szeliski, Richard}, + year = {2022}, + doi = {10.1007/978-3-030-34372-9_3}, + pages = {85--151}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\4E9CTSBC\\Szeliski - 2022 - Image Processing.pdf:application/pdf}, +} + +@incollection{szeliski_introduction_2022, + address = {Cham}, + title = {Introduction}, + isbn = {978-3-030-34372-9}, + url = {https://doi.org/10.1007/978-3-030-34372-9_1}, + abstract = {As humans, we perceive the three-dimensional structure of the world around us with apparent ease. Think of how vivid the three-dimensional percept is when you look at a vase of flowers sitting on the table next to you.}, + language = {en}, + urldate = {2025-03-23}, + booktitle = {Computer {Vision}: {Algorithms} and {Applications}}, + publisher = {Springer International Publishing}, + author = {Szeliski, Richard}, + editor = {Szeliski, Richard}, + year = {2022}, + doi = {10.1007/978-3-030-34372-9_1}, + pages = {1--26}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\AWW7TKMC\\Szeliski - 2022 - Introduction.pdf:application/pdf}, +} + +@misc{noauthor_computer_2010, + title = {Computer {Vision}: {Algorithms} and {Applications}}, + shorttitle = {Computer {Vision}}, + url = {https://scispace.com/papers/computer-vision-algorithms-and-applications-25dn6wu83j}, + abstract = {Humans perceive the three-dimensional structure of the world with apparent ease. However, despite all of the recent advances in computer vision research, the dream of having a computer interpret an image at the same level as a two-year old remains elusive. Why is computer vision such a challenging problem and what is the current state of the art? Computer Vision: Algorithms and Applications explores the variety of techniques commonly used to analyze and interpret images. It also describes challenging real-world applications where vision is being successfully used, both for specialized applications such as medical imaging, and for fun, consumer-level tasks such as image editing and stitching, which students can apply to their own personal photos and videos. More than just a source of recipes, this exceptionally authoritative and comprehensive textbook/reference also takes a scientific approach to basic vision problems, formulating physical models of the imaging process before inverting them to produce descriptions of a scene. These problems are also analyzed using statistical models and solved using rigorous engineering techniques Topics and features: structured to support active curricula and project-oriented courses, with tips in the Introduction for using the book in a variety of customized courses; presents exercises at the end of each chapter with a heavy emphasis on testing algorithms and containing numerous suggestions for small mid-term projects; provides additional material and more detailed mathematical topics in the Appendices, which cover linear algebra, numerical techniques, and Bayesian estimation theory; suggests additional reading at the end of each chapter, including the latest research in each sub-field, in addition to a full Bibliography at the end of the book; supplies supplementary course material for students at the associated website, http://szeliski.org/Book/. Suitable for an upper-level undergraduate or graduate-level course in computer science or engineering, this textbook focuses on basic techniques that work under real-world conditions and encourages students to push their creative boundaries. Its design and exposition also make it eminently suitable as a unique reference to the fundamental techniques and current research literature in computer vision.}, + language = {en}, + urldate = {2025-03-23}, + journal = {SciSpace - Paper}, + month = sep, + year = {2010}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\BG7IZ622\\2010 - Computer Vision Algorithms and Applications.pdf:application/pdf}, +} diff --git a/sources/references.bib b/sources/references.bib index 045f0d8beaade2eb17855fafb5edda8cc6565ecd..5c6386d8944aaf080c9eeb0ab58a5f79d329807d 100644 --- a/sources/references.bib +++ b/sources/references.bib @@ -297,3 +297,362 @@ Publisher: Multidisciplinary Digital Publishing Institute}, pages = {337--342}, file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\25G2NS2A\\Rao - 2023 - A Comparative Analysis of Deep Learning Frameworks and Libraries.pdf:application/pdf}, } + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +@book{goodfellow_deep_2016, + title = {Deep {Learning}}, + isbn = {978-0-262-03561-3}, + abstract = {An introduction to a broad range of topics in deep learning, covering mathematical and conceptual background, deep learning techniques used in industry, and research perspectives.“Written by three experts in the field, Deep Learning is the only comprehensive book on the subject.â€â€”Elon Musk, cochair of OpenAI; cofounder and CEO of Tesla and SpaceXDeep learning is a form of machine learning that enables computers to learn from experience and understand the world in terms of a hierarchy of concepts. Because the computer gathers knowledge from experience, there is no need for a human computer operator to formally specify all the knowledge that the computer needs. The hierarchy of concepts allows the computer to learn complicated concepts by building them out of simpler ones; a graph of these hierarchies would be many layers deep. This book introduces a broad range of topics in deep learning. The text offers mathematical and conceptual background, covering relevant concepts in linear algebra, probability theory and information theory, numerical computation, and machine learning. It describes deep learning techniques used by practitioners in industry, including deep feedforward networks, regularization, optimization algorithms, convolutional networks, sequence modeling, and practical methodology; and it surveys such applications as natural language processing, speech recognition, computer vision, online recommendation systems, bioinformatics, and videogames. Finally, the book offers research perspectives, covering such theoretical topics as linear factor models, autoencoders, representation learning, structured probabilistic models, Monte Carlo methods, the partition function, approximate inference, and deep generative models. Deep Learning can be used by undergraduate or graduate students planning careers in either industry or research, and by software engineers who want to begin using deep learning in their products or platforms. A website offers supplementary material for both readers and instructors.}, + language = {en}, + publisher = {MIT Press}, + author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron}, + month = nov, + year = {2016}, + note = {Google-Books-ID: Np9SDQAAQBAJ}, + keywords = {Computers / Artificial Intelligence / General, Computers / Computer Science, Computers / Data Science / Machine Learning}, +} + +@book{gonzalez_digital_2008, + title = {Digital image processing}, + isbn = {978-0-13-168728-8 978-0-13-505267-9}, + url = {http://archive.org/details/digitalimageproc0003gonz}, + abstract = {xxii, 954 pages : 25 cm; Completely self-contained-and heavily illustrated-this introduction to basic concepts and methodologies for digital image processing is written at a level that truly is suitable for seniors and first-year graduate students in almost any technical discipline. The leading textbook in its field for more than twenty years, it continues its cutting-edge focus on contemporary developments in all mainstream areas of image processing-e.g., image fundamentals, image enhancement in the spatial and frequency domains, restoration, color image processing, wavelets, image compression, morphology, segmentation, image description, and the fundamentals of object recognition. It focuses on material that is fundamental and has a broad scope of application; Includes bibliographical references (pages 915-942) and index; Introduction -- Digital image fundamentals -- Intensity transformations and spatial filtering -- Filtering the frequency domain -- Image restoration and reconstruction -- Color image processing -- Wavelets and multiresolution processing -- Image compression -- Morphological image processing -- Image segmentation -- Representation and description -- Object recongnition}, + language = {eng}, + urldate = {2025-02-09}, + publisher = {Upper Saddle River, N.J. : Prentice Hall}, + author = {Gonzalez, Rafael C.}, + collaborator = {{Internet Archive}}, + year = {2008}, + keywords = {Image processing -- Digital techniques}, +} + +@book{gonzalez_digital_2008-1, + title = {Digital {Image} {Processing}}, + isbn = {978-0-13-168728-8}, + abstract = {For courses in Image Processing and Computer Vision. Completely self-contained--and heavily illustrated--this introduction to basic concepts and methodologies for digital image processing is written at a level that truly is suitable for seniors and first-year graduate students in almost any technical discipline. The leading textbook in its field for more than twenty years, it continues its cutting-edge focus on contemporary developments in all mainstream areas of image processing--e.g., image fundamentals, image enhancement in the spatial and frequency domains, restoration, color image processing, wavelets, image compression, morphology, segmentation, image description, and the fundamentals of object recognition. It focuses on material that is fundamental and has a broad scope of application.}, + language = {en}, + publisher = {Prentice Hall}, + author = {Gonzalez, Rafael C. and Woods, Richard Eugene}, + year = {2008}, + note = {Google-Books-ID: 8uGOnjRGEzoC}, + keywords = {Computers / Image Processing, Computers / Optical Data Processing, Technology \& Engineering / Imaging Systems, Technology \& Engineering / Signals \& Signal Processing}, +} + +@book{jain_fundamentals_1989, + title = {Fundamentals of {Digital} {Image} {Processing}}, + isbn = {978-0-13-336165-0}, + abstract = {Presents a thorough overview of the major topics of digital image processing, beginning with the basic mathematical tools needed for the subject. Includes a comprehensive chapter on stochastic models for digital image processing. Covers aspects of image representation including luminance, color, spatial and temporal properties of vision, and digitization. Explores various image processing techniques. Discusses algorithm development (software/firmware) for image transforms, enhancement, reconstruction, and image coding.}, + language = {en}, + publisher = {Prentice Hall}, + author = {Jain, Anil K.}, + year = {1989}, + note = {Google-Books-ID: GANSAAAAMAAJ}, + keywords = {Computers / Image Processing, Computers / Optical Data Processing, Technology \& Engineering / Imaging Systems, Technology \& Engineering / Signals \& Signal Processing, Science / Physics / Optics \& Light, Technology \& Engineering / Electrical, Technology \& Engineering / Telecommunications}, +} + +@book{jain_fundamentals_1989-1, + title = {Fundamentals of digital image processing}, + isbn = {978-0-13-336165-0}, + url = {http://archive.org/details/fundamentalsofdi0000jain}, + abstract = {xxi, 569 p. : 24 cm; Includes bibliographical references and index}, + language = {eng}, + urldate = {2025-02-09}, + publisher = {Englewood Cliffs, NJ : Prentice Hall}, + author = {Jain, Anil K.}, + collaborator = {{Internet Archive}}, + year = {1989}, + keywords = {Image processing -- Digital techniques}, +} + +@book{russ_image_2016, + title = {The {Image} {Processing} {Handbook}}, + isbn = {978-1-4398-4063-4}, + abstract = {Whether obtained by microscopes, space probes, or the human eye, the same basic tools can be applied to acquire, process, and analyze the data contained in images. Ideal for self study, The Image Processing Handbook, Sixth Edition, first published in 1992, raises the bar once again as the gold-standard reference on this subject. Using extensive new illustrations and diagrams, it offers a logically organized exploration of the important relationship between 2D images and the 3D structures they reveal. Provides Hundreds of Visual Examples in FULL COLOR! The author focuses on helping readers visualize and compare processing and measurement operations and how they are typically combined in fields ranging from microscopy and astronomy to real-world scientific, industrial, and forensic applications. Presenting methods in the order in which they would be applied in a typical workflow—from acquisition to interpretation—this book compares a wide range of algorithms used to: Improve the appearance, printing, and transmission of an image Prepare images for measurement of the features and structures they reveal Isolate objects and structures, and measure their size, shape, color, and position Correct defects and deal with limitations in images Enhance visual content and interpretation of details This handbook avoids dense mathematics, instead using new practical examples that better convey essential principles of image processing. This approach is more useful to develop readers’ grasp of how and why to apply processing techniques and ultimately process the mathematical foundations behind them. Much more than just an arbitrary collection of algorithms, this is the rare book that goes beyond mere image improvement, presenting a wide range of powerful example images that illustrate techniques involved in color processing and enhancement. Applying his 50-year experience as a scientist, educator, and industrial consultant, John Russ offers the benefit of his image processing expertise for fields ranging from astronomy and biomedical research to food science and forensics. His valuable insights and guidance continue to make this handbook a must-have reference.}, + language = {en}, + publisher = {CRC Press}, + author = {Russ, John C.}, + month = apr, + year = {2016}, + note = {Google-Books-ID: gxXXRJWfEsoC}, + keywords = {Computers / Optical Data Processing, Technology \& Engineering / Imaging Systems, Computers / General, Medical / Biotechnology, Technology \& Engineering / Biomedical}, +} + +@book{bradski_learning_2008, + title = {Learning {OpenCV} : computer vision with the {OpenCV} library}, + isbn = {978-0-596-51613-0}, + shorttitle = {Learning {OpenCV}}, + url = {http://archive.org/details/learningopencvco0000brad}, + abstract = {xvii, 555 pages : 24 cm; Learning OpenCV puts you in the middle of the rapidly expanding field of computer vision. Written by the creators of the free open source OpenCV library, this book introduces you to computer vision and demonstrates how you can quickly build applications that enable computers to "see" and make decisions based on that data. Computer vision is everywhere-in security systems, manufacturing inspection systems, medical image analysis, Unmanned Aerial Vehicles, and more. It stitches Google maps and Google Earth together, checks the pixels on LCD screens, and makes sure the stitches in your shirt are sewn properly. OpenCV provides an easy-to-use computer vision framework and a comprehensive library with more than 500 functions that can run vision code in real time; Includes bibliographical references (pages 527-541) and index; Overview -- Introduction to OpenCV -- Getting to know OpenCV -- HighGUI -- Image processing -- Image transforms -- Histograms and matching -- Contours -- Image parts and segmentation -- Tracking and motion -- Camera models and calibration -- Projection and 3D vision -- Machine learning -- OpenCV's future}, + language = {eng}, + urldate = {2025-03-23}, + publisher = {Sebastopol, CA : O'Reilly}, + author = {Bradski, Gary R.}, + collaborator = {{Internet Archive}}, + year = {2008}, + keywords = {OpenCV}, +} + +@incollection{szeliski_image_2022, + address = {Cham}, + title = {Image {Processing}}, + isbn = {978-3-030-34372-9}, + url = {https://doi.org/10.1007/978-3-030-34372-9_3}, + abstract = {Now that we have seen how images are formed through the interaction of 3D scene elements, lighting, and camera optics and sensors, let us look at the first stage in most computer vision algorithms, namely the use of image processing to preprocess the image and convert it into a form suitable for further analysis. Examples of such operations include exposure correction and color balancing, reducing image noise, increasing sharpness, or straightening the image by rotating it.}, + language = {en}, + urldate = {2025-03-23}, + booktitle = {Computer {Vision}: {Algorithms} and {Applications}}, + publisher = {Springer International Publishing}, + author = {Szeliski, Richard}, + editor = {Szeliski, Richard}, + year = {2022}, + doi = {10.1007/978-3-030-34372-9_3}, + pages = {85--151}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\4E9CTSBC\\Szeliski - 2022 - Image Processing.pdf:application/pdf}, +} + +@incollection{szeliski_introduction_2022, + address = {Cham}, + title = {Introduction}, + isbn = {978-3-030-34372-9}, + url = {https://doi.org/10.1007/978-3-030-34372-9_1}, + abstract = {As humans, we perceive the three-dimensional structure of the world around us with apparent ease. Think of how vivid the three-dimensional percept is when you look at a vase of flowers sitting on the table next to you.}, + language = {en}, + urldate = {2025-03-23}, + booktitle = {Computer {Vision}: {Algorithms} and {Applications}}, + publisher = {Springer International Publishing}, + author = {Szeliski, Richard}, + editor = {Szeliski, Richard}, + year = {2022}, + doi = {10.1007/978-3-030-34372-9_1}, + pages = {1--26}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\AWW7TKMC\\Szeliski - 2022 - Introduction.pdf:application/pdf}, +} + +@misc{noauthor_computer_2010, + title = {Computer {Vision}: {Algorithms} and {Applications}}, + shorttitle = {Computer {Vision}}, + url = {https://scispace.com/papers/computer-vision-algorithms-and-applications-25dn6wu83j}, + abstract = {Humans perceive the three-dimensional structure of the world with apparent ease. However, despite all of the recent advances in computer vision research, the dream of having a computer interpret an image at the same level as a two-year old remains elusive. Why is computer vision such a challenging problem and what is the current state of the art? Computer Vision: Algorithms and Applications explores the variety of techniques commonly used to analyze and interpret images. It also describes challenging real-world applications where vision is being successfully used, both for specialized applications such as medical imaging, and for fun, consumer-level tasks such as image editing and stitching, which students can apply to their own personal photos and videos. More than just a source of recipes, this exceptionally authoritative and comprehensive textbook/reference also takes a scientific approach to basic vision problems, formulating physical models of the imaging process before inverting them to produce descriptions of a scene. These problems are also analyzed using statistical models and solved using rigorous engineering techniques Topics and features: structured to support active curricula and project-oriented courses, with tips in the Introduction for using the book in a variety of customized courses; presents exercises at the end of each chapter with a heavy emphasis on testing algorithms and containing numerous suggestions for small mid-term projects; provides additional material and more detailed mathematical topics in the Appendices, which cover linear algebra, numerical techniques, and Bayesian estimation theory; suggests additional reading at the end of each chapter, including the latest research in each sub-field, in addition to a full Bibliography at the end of the book; supplies supplementary course material for students at the associated website, http://szeliski.org/Book/. Suitable for an upper-level undergraduate or graduate-level course in computer science or engineering, this textbook focuses on basic techniques that work under real-world conditions and encourages students to push their creative boundaries. Its design and exposition also make it eminently suitable as a unique reference to the fundamental techniques and current research literature in computer vision.}, + language = {en}, + urldate = {2025-03-23}, + journal = {SciSpace - Paper}, + month = sep, + year = {2010}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\BG7IZ622\\2010 - Computer Vision Algorithms and Applications.pdf:application/pdf}, +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +@article{cooley_algorithm_nodate, + title = {An {Algorithm} for the {Machine} {Calculation} of {Complex} {Fourier} {Series}}, + language = {en}, + author = {Cooley, James W and Tukey, John W}, + file = {PDF:C\:\\Users\\SFI19\\Zotero\\storage\\MCLKRX9H\\Cooley and Tukey - An Algorithm for the Machine Calculation of Complex Fourier Series.pdf:application/pdf}, +} + +@article{cooley_algorithm_1965, + title = {An {Algorithm} for the {Machine} {Calculation} of {Complex} {Fourier} {Series}}, + volume = {19}, + issn = {0025-5718}, + url = {https://www.jstor.org/stable/2003354}, + doi = {10.2307/2003354}, + number = {90}, + urldate = {2025-03-23}, + journal = {Mathematics of Computation}, + author = {Cooley, James W. and Tukey, John W.}, + year = {1965}, + note = {Publisher: American Mathematical Society}, + pages = {297--301}, + file = {Full Text:C\:\\Users\\SFI19\\Zotero\\storage\\DJ3PD27D\\Cooley and Tukey - 1965 - An Algorithm for the Machine Calculation of Complex Fourier Series.pdf:application/pdf}, +} + +@article{hounsfield_computerized_1973, + title = {Computerized transverse axial scanning (tomography): {Part} 1. {Description} of system}, + volume = {46}, + issn = {0007-1285}, + shorttitle = {Computerized transverse axial scanning (tomography)}, + url = {https://doi.org/10.1259/0007-1285-46-552-1016}, + doi = {10.1259/0007-1285-46-552-1016}, + abstract = {This article describes a technique in which X-ray transmission readings are taken through the head at a multitude of angles: from these data, absorption values of the material contained within the head are calculated on a computer and presented as a series of pictures of slices of the cranium. The system is approximately 100 times more sensitive than conventional X-ray systems to such an extent that variations in soft tissues of nearly similar density can be displayed.}, + number = {552}, + urldate = {2025-03-23}, + journal = {British Journal of Radiology}, + author = {Hounsfield, G. N.}, + month = dec, + year = {1973}, + pages = {1016--1022}, + file = {Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\Q676YM6Q\\7306149.html:text/html}, +} + +@article{lecun_deep_2015, + title = {Deep learning}, + volume = {521}, + issn = {1476-4687}, + doi = {10.1038/nature14539}, + abstract = {Deep learning allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. These methods have dramatically improved the state-of-the-art in speech recognition, visual object recognition, object detection and many other domains such as drug discovery and genomics. Deep learning discovers intricate structure in large data sets by using the backpropagation algorithm to indicate how a machine should change its internal parameters that are used to compute the representation in each layer from the representation in the previous layer. Deep convolutional nets have brought about breakthroughs in processing images, video, speech and audio, whereas recurrent nets have shone light on sequential data such as text and speech. (PsycINFO Database Record (c) 2016 APA, all rights reserved)}, + number = {7553}, + journal = {Nature}, + author = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey}, + year = {2015}, + note = {Place: United Kingdom +Publisher: Nature Publishing Group}, + keywords = {Algorithms, Computational Modeling, Machine Learning, Object Recognition}, + pages = {436--444}, +} + +@misc{hinton_improving_2012, + title = {Improving neural networks by preventing co-adaptation of feature detectors}, + url = {http://arxiv.org/abs/1207.0580}, + doi = {10.48550/arXiv.1207.0580}, + abstract = {When a large feedforward neural network is trained on a small training set, it typically performs poorly on held-out test data. This "overfitting" is greatly reduced by randomly omitting half of the feature detectors on each training case. This prevents complex co-adaptations in which a feature detector is only helpful in the context of several other specific feature detectors. Instead, each neuron learns to detect a feature that is generally helpful for producing the correct answer given the combinatorially large variety of internal contexts in which it must operate. Random "dropout" gives big improvements on many benchmark tasks and sets new records for speech and object recognition.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Hinton, Geoffrey E. and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R.}, + month = jul, + year = {2012}, + note = {arXiv:1207.0580 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\25BYMHFC\\Hinton et al. - 2012 - Improving neural networks by preventing co-adaptation of feature detectors.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\HIDVZ7NV\\1207.html:text/html}, +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +@article{zhang_efficient_2023, + title = {An efficient lightweight convolutional neural network for industrial surface defect detection}, + volume = {56}, + issn = {1573-7462}, + url = {https://doi.org/10.1007/s10462-023-10438-y}, + doi = {10.1007/s10462-023-10438-y}, + abstract = {Since surface defect detection is significant to ensure the utility, integrality, and security of productions, and it has become a key issue to control the quality of industrial products, which arouses interests of researchers. However, deploying deep convolutional neural networks (DCNNs) on embedded devices is very difficult due to limited storage space and computational resources. In this paper, an efficient lightweight convolutional neural network (CNN) model is designed for surface defect detection of industrial productions in the perspective of image processing via deep learning. By combining the inverse residual architecture with coordinate attention (CA) mechanism, a coordinate attention mobile (CAM) backbone network is constructed for feature extraction. Then, in order to solve the small object detection problem, the multi-scale strategy is developed by introducing the CA into the cross-layer information flow to improve the quality of feature extraction and augment the representation ability on multi-scale features. Hereafter, the multi-scale feature is integrated to design a novel bidirectional weighted feature pyramid network (BWFPN) to improve the model detection accuracy without increasing much computational burden. From the comparative experimental results on open source datasets, the effectiveness of the developed lightweight CNN is evaluated, and the detection accuracy attains on par with the state-of-the-art (SOTA) model with less parameters and calculation.}, + language = {en}, + number = {9}, + urldate = {2025-03-23}, + journal = {Artificial Intelligence Review}, + author = {Zhang, Dehua and Hao, Xinyuan and Wang, Dechen and Qin, Chunbin and Zhao, Bo and Liang, Linlin and Liu, Wei}, + month = sep, + year = {2023}, + keywords = {Artificial Intelligence, Attention mechanism, Feature pyramid networks, Lightweight convolutional neural networks, Surface defect detection}, + pages = {10651--10677}, + file = {Full Text PDF:C\:\\Users\\SFI19\\Zotero\\storage\\EJQ8PAKB\\Zhang et al. - 2023 - An efficient lightweight convolutional neural network for industrial surface defect detection.pdf:application/pdf}, +} + +@article{litjens_survey_2017, + title = {A {Survey} on {Deep} {Learning} in {Medical} {Image} {Analysis}}, + volume = {42}, + issn = {13618415}, + url = {http://arxiv.org/abs/1702.05747}, + doi = {10.1016/j.media.2017.07.005}, + abstract = {Deep learning algorithms, in particular convolutional networks, have rapidly become a methodology of choice for analyzing medical images. This paper reviews the major deep learning concepts pertinent to medical image analysis and summarizes over 300 contributions to the field, most of which appeared in the last year. We survey the use of deep learning for image classification, object detection, segmentation, registration, and other tasks and provide concise overviews of studies per application area. Open challenges and directions for future research are discussed.}, + urldate = {2025-03-23}, + journal = {Medical Image Analysis}, + author = {Litjens, Geert and Kooi, Thijs and Bejnordi, Babak Ehteshami and Setio, Arnaud Arindra Adiyoso and Ciompi, Francesco and Ghafoorian, Mohsen and Laak, Jeroen A. W. M. van der and Ginneken, Bram van and Sánchez, Clara I.}, + month = dec, + year = {2017}, + note = {arXiv:1702.05747 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + pages = {60--88}, + annote = {Comment: Revised survey includes expanded discussion section and reworked introductory section on common deep architectures. Added missed papers from before Feb 1st 2017}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\DT6DHLHY\\Litjens et al. - 2017 - A Survey on Deep Learning in Medical Image Analysis.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\DSJ5RKP6\\1702.html:text/html}, +} + +@article{maimaitijiang_soybean_2020, + title = {Soybean yield prediction from {UAV} using multimodal data fusion and deep learning}, + url = {https://www.academia.edu/84238554/Soybean_yield_prediction_from_UAV_using_multimodal_data_fusion_and_deep_learning}, + abstract = {Preharvest crop yield prediction is critical for grain policy making and food security. Early estimation of yield at field or plot scale also contributes to high-throughput plant phenotyping and precision agriculture. New developments in Unmanned}, + urldate = {2025-03-23}, + journal = {Remote Sensing of Environment}, + author = {Maimaitijiang, Maitiniyazi}, + month = jan, + year = {2020}, + file = {PDF:C\:\\Users\\SFI19\\Zotero\\storage\\PB6J69JW\\Maimaitijiang - 2020 - Soybean yield prediction from UAV using multimodal data fusion and deep learning.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\SYJLAK92\\Soybean_yield_prediction_from_UAV_using_multimodal_data_fusion_and_deep_learning.html:text/html}, +} + +@misc{janai_computer_2021, + title = {Computer {Vision} for {Autonomous} {Vehicles}: {Problems}, {Datasets} and {State} of the {Art}}, + shorttitle = {Computer {Vision} for {Autonomous} {Vehicles}}, + url = {http://arxiv.org/abs/1704.05519}, + doi = {10.48550/arXiv.1704.05519}, + abstract = {Recent years have witnessed enormous progress in AI-related fields such as computer vision, machine learning, and autonomous vehicles. As with any rapidly growing field, it becomes increasingly difficult to stay up-to-date or enter the field as a beginner. While several survey papers on particular sub-problems have appeared, no comprehensive survey on problems, datasets, and methods in computer vision for autonomous vehicles has been published. This book attempts to narrow this gap by providing a survey on the state-of-the-art datasets and techniques. Our survey includes both the historically most relevant literature as well as the current state of the art on several specific topics, including recognition, reconstruction, motion estimation, tracking, scene understanding, and end-to-end learning for autonomous driving. Towards this goal, we analyze the performance of the state of the art on several challenging benchmarking datasets, including KITTI, MOT, and Cityscapes. Besides, we discuss open problems and current research challenges. To ease accessibility and accommodate missing references, we also provide a website that allows navigating topics as well as methods and provides additional information.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Janai, Joel and Güney, Fatma and Behl, Aseem and Geiger, Andreas}, + month = mar, + year = {2021}, + note = {arXiv:1704.05519 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Robotics}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\J5BRT4MJ\\Janai et al. - 2021 - Computer Vision for Autonomous Vehicles Problems, Datasets and State of the Art.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\PTNC6R8L\\1704.html:text/html}, +} + +@misc{ren_faster_2016, + title = {Faster {R}-{CNN}: {Towards} {Real}-{Time} {Object} {Detection} with {Region} {Proposal} {Networks}}, + shorttitle = {Faster {R}-{CNN}}, + url = {http://arxiv.org/abs/1506.01497}, + doi = {10.48550/arXiv.1506.01497}, + abstract = {State-of-the-art object detection networks depend on region proposal algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN have reduced the running time of these detection networks, exposing region proposal computation as a bottleneck. In this work, we introduce a Region Proposal Network (RPN) that shares full-image convolutional features with the detection network, thus enabling nearly cost-free region proposals. An RPN is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by Fast R-CNN for detection. We further merge RPN and Fast R-CNN into a single network by sharing their convolutional features---using the recently popular terminology of neural networks with 'attention' mechanisms, the RPN component tells the unified network where to look. For the very deep VGG-16 model, our detection system has a frame rate of 5fps (including all steps) on a GPU, while achieving state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015 competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning entries in several tracks. Code has been made publicly available.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, + month = jan, + year = {2016}, + note = {arXiv:1506.01497 [cs]}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + annote = {Comment: Extended tech report}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\PKY5AU96\\Ren et al. - 2016 - Faster R-CNN Towards Real-Time Object Detection with Region Proposal Networks.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\X8PBIK44\\1506.html:text/html}, +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +@misc{abadi_tensorflow_2016, + title = {{TensorFlow}: {A} system for large-scale machine learning}, + shorttitle = {{TensorFlow}}, + url = {http://arxiv.org/abs/1605.08695}, + doi = {10.48550/arXiv.1605.08695}, + abstract = {TensorFlow is a machine learning system that operates at large scale and in heterogeneous environments. TensorFlow uses dataflow graphs to represent computation, shared state, and the operations that mutate that state. It maps the nodes of a dataflow graph across many machines in a cluster, and within a machine across multiple computational devices, including multicore CPUs, general-purpose GPUs, and custom designed ASICs known as Tensor Processing Units (TPUs). This architecture gives flexibility to the application developer: whereas in previous "parameter server" designs the management of shared state is built into the system, TensorFlow enables developers to experiment with novel optimizations and training algorithms. TensorFlow supports a variety of applications, with particularly strong support for training and inference on deep neural networks. Several Google services use TensorFlow in production, we have released it as an open-source project, and it has become widely used for machine learning research. In this paper, we describe the TensorFlow dataflow model in contrast to existing systems, and demonstrate the compelling performance that TensorFlow achieves for several real-world applications.}, + urldate = {2025-03-23}, + publisher = {arXiv}, + author = {Abadi, MartÃn and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and Kudlur, Manjunath and Levenberg, Josh and Monga, Rajat and Moore, Sherry and Murray, Derek G. and Steiner, Benoit and Tucker, Paul and Vasudevan, Vijay and Warden, Pete and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang}, + month = may, + year = {2016}, + note = {arXiv:1605.08695 [cs]}, + keywords = {Computer Science - Artificial Intelligence, Computer Science - Distributed, Parallel, and Cluster Computing}, + annote = {Comment: 18 pages, 9 figures; v2 has a spelling correction in the metadata}, + file = {Preprint PDF:C\:\\Users\\SFI19\\Zotero\\storage\\ND7JHGWD\\Abadi et al. - 2016 - TensorFlow A system for large-scale machine learning.pdf:application/pdf;Snapshot:C\:\\Users\\SFI19\\Zotero\\storage\\7IRZAXVR\\1605.html:text/html}, +} + + +@book{ragan-kelley_halide_2013, + title = {Halide: {A} {Language} and {Compiler} for {Optimizing} {Parallelism}, {Locality}, and {Recomputation} in {Image} {Processing} {Pipelines}}, + volume = {48}, + shorttitle = {Halide}, + abstract = {Image processing pipelines combine the challenges of stencil computations and stream programs. They are composed of large graphs of different stencil stages, as well as complex reductions, and stages with global or data-dependent access patterns. Because of their complex structure, the performance difference between a naive implementation of a pipeline and an optimized one is often an order of magnitude. Efficient implementations require optimization of both parallelism and locality, but due to the nature of stencils, there is a fundamental tension between parallelism, locality, and introducing redundant recomputation of shared values. +We present a systematic model of the tradeoff space fundamental to stencil pipelines, a schedule representation which describes concrete points in this space for each stage in an image processing pipeline, and an optimizing compiler for the Halide image processing language that synthesizes high performance implementations from a Halide algorithm and a schedule. Combining this compiler with stochastic search over the space of schedules enables terse, composable programs to achieve state-of-the-art performance on a wide range of real image processing pipelines, and across different hardware architectures, including multicores with SIMD, and heterogeneous CPU+GPU execution. From simple Halide programs written in a few hours, we demonstrate performance up to 5x faster than hand-tuned C, intrinsics, and CUDA implementations optimized by experts over weeks or months, for image processing applications beyond the reach of past automatic compilers.}, + author = {Ragan-Kelley, Jonathan and Barnes, Connelly and Adams, Andrew and Paris, Sylvain and Durand, Frédo and Amarasinghe, Saman}, + month = jun, + year = {2013}, + doi = {10.1145/2499370.2462176}, + note = {Journal Abbreviation: ACM SIGPLAN Notices +Pages: 530 +Publication Title: ACM SIGPLAN Notices}, + file = {Full Text:C\:\\Users\\SFI19\\Zotero\\storage\\62D2CBIL\\Ragan-Kelley et al. - 2013 - Halide A Language and Compiler for Optimizing Parallelism, Locality, and Recomputation in Image Pro.pdf:application/pdf}, +} + +@book{russell_artificial_2016, + address = {Boston}, + edition = {Third edition, Global edition}, + title = {Artificial intelligence a modern approach}, + isbn = {978-1-292-15396-4}, + url = {http://www.gbv.de/dms/tib-ub-hannover/848811429.pdf}, + abstract = {Hier auch später erschienene, unveränderte Nachdrucke}, + urldate = {2025-03-23}, + publisher = {Pearson}, + author = {Russell, Stuart J. and Norvig, Peter and Davis, Ernest and Edwards, Douglas}, + year = {2016}, + keywords = {Artificial intelligence, Künstliche Intelligenz, Precht, Richard David}, + file = {Artificial Intelligence-A Modern Approach (3rd Edition) ( PDFDrive ).pdf:C\:\\Users\\SFI19\\Zotero\\storage\\MX8PZ6JQ\\Artificial Intelligence-A Modern Approach (3rd Edition) ( PDFDrive ).pdf:application/pdf}, +} +