How can I store back the value to memory so I can loop over them? I tried couple of times but it's not working. Unable to loop them. What can I do?
.data
'by rowmajor order'
mA: .float 1.5,2.5,3.5,4.5
'by rowmajor order'
mB: .float 5.5,6.5,7.5,8.5
'4 elements in resultant matrix mC, each 4 bytes'
mC: .float 0,0,0,0
space: .asciiz " "
.text
'a3 stores byte size of resultant matrix mC'
la $a3,mC
'a1 stores i/p matrix mA'
la $a1,mA
la $a2,mB
'a'
l.s $f1, 0($a1)
'b'
l.s $f2, 4($a1)
'c'
l.s $f3, 8($a1)
'd'
l.s $f4, 12($a1)
l.s $f5, 0($a2)
'e'
l.s $f6, 4($a2)
'f'
l.s $f7, 8($a2)
'g'
l.s $f8, 12($a2)
'Here using the Strassen's Algorithm for matrix multiplication
add.s $f0,$f1,$f4 #a+d
add.s $f9,$f5,$f8 #e+h
mul.s $f0, $f0,$f9 # p5
sub.s $f9,$f7,$f5 #ge
mul.s $f9,$f4,$f9 #d(ge)  p4
add.s $f10,$f1,$f2 # a+b
mul.s $f10,$f8,$f10 #h(a+b)  p2
sub.s $f11,$f2,$f4 # (bd)
add.s $f12,$f7,$f8 #(g+h)
mul.s $f13,$f11,$f12 #p6
add.s $f11, $f0,$f9 # p5+p4
add.s $f12,$f11,$f13 #p5+p4+p6
sub.s $f11,$f12,$f10 #p5+p4+p6p2
'Values stored in the first cell of the resultant matrix'
s.s $f11,0($a3)
#... p1,p2
sub.s $f13,$f6,$f8 # (fh)
mul.s $f12,$f13,$f1 #a(fh) $f12 p1
add.s $f13, $f12,$f10 # (p1+p2)
s.s $f13, 4($a3)
add.s $f14,$f3,$f4 # c+d
mul.s $f15,$f14,$f5 #e(c+d)
add.s $f14,$f15,$f9 #p3+p4
s.s $f14,8($a3)
sub.s $f16,$f1,$f3 '(ac)'
add.s $f17,$f5,$f6 '(e+f)'
mul.s $f16,$f16,$f17 '(ac)*(e+f)  p7'
add.s $f17,$f12,$f0 '(p1+p5)'
sub.s $f18,$f17,$f15 '(p1+p5p3)'
sub.s $f17,$f18,$f16 '(p1+p5p3p7)'
s.s $f17,12($a3)
'Print the resultant matrix'
li $t1,0
loop: beq $t1, 4, exit
'PUT YOUR CODE HERE TO PRINT THE RESULT MATRIX'
j loop
exit:
li $v0,10
syscall
See also questions close to this topic

Why is _mm512_store_pd super slow in this matrix multiplication code?
I'm playing with avx512 and matrix multiplications but I must be doing something wrong because I have awful performances when I try to store my results using _mm512_store_pd.
Here are the relevant snippets of code, first the data structure I'm using and how I initialize it:
typedef struct { double* values; int nb_l; int nb_c; } matrix; matrix* alloc_matrix(int nb_l, int nb_c){ matrix* tmp_matrix = (matrix*)malloc(sizeof(matrix)); tmp_matrix>values = (double*)aligned_alloc(64, sizeof(double) * nb_l * nb_c); tmp_matrix>nb_l = nb_l; tmp_matrix>nb_c = nb_c; return tmp_matrix; }
And here is how I'm trying to multiply two matrices initialized elsewhere in my code:
matrix* mult_matrix(matrix* A, matrix* B){ /* avx512 */ matrix* res_matrix = zero_matrix(A>nb_l, B>nb_c); double* res_ptr; // start index of the current line in res_matrix double* B_ptr; // start index of the current line in B __m512d A_broadcast, B_l_8, res_ptr_8; for (unsigned int idx_A = 0; idx_A < A>nb_l * A> nb_c; idx_A++){ // broadcast current value of A eight times A_broadcast = _mm512_set1_pd(A>values[idx_A]); res_ptr = res_matrix>values + (idx_A / A>nb_c) * B>nb_c; B_ptr = B>values + (idx_A % A>nb_c) * B>nb_c; for (unsigned int offset_B = 0; offset_B < B>nb_c; offset_B+=8){ B_l_8 = _mm512_load_pd(&B_ptr[offset_B]); res_ptr_8 = _mm512_load_pd(&res_ptr[offset_B]); _mm512_store_pd( &res_ptr[offset_B] , _mm512_fmadd_pd(A_broadcast, B_l_8, res_ptr_8) ); } } return res_matrix;
The results are OK but _mm512_store_pd takes ~90% of the execution time, actually this avx512 code is barely faster than its non avx version.
I've tried everything I could think of but I'm can't find why I have such disappointing performances with this code. Do you have any idea ?
Thanks.
EDIT 1
Here is the non avx code
matrix* res_matrix = zero_matrix(A>nb_l, B>nb_c); double* res_ptr; // start index of the current line in res_matrix double* B_ptr; // start index of the current line in B for (unsigned int idx_A = 0; idx_A < A>nb_l * A> nb_c; idx_A++){ res_ptr = res_matrix>values + (idx_A / A>nb_c) * B>nb_c; B_ptr = B>values + (idx_A % A>nb_c) * B>nb_c; for (unsigned int offset_B = 0; offset_B < B>nb_c; offset_B++){ res_ptr[offset_B] += A>values[idx_A] * B_ptr[offset_B]; } } return res_matrix;
All matrices are 512x512 random matrices, each multiplication is repeated 50 times and the running time is averaged.
Finally the snippet below should be OK in order to test avx and non_avx versions of my code. I've compiled it with gcc 8.3.0 using the following options: gcc Ofast mavx mavx512f m64 mfpmath=sse mfma flto funrollloops matrix_minimal.c
#include <stdio.h> #include <stdlib.h> #include <sys/time.h> #include <time.h> #include <string.h> #include <immintrin.h> typedef struct { double* values; int nb_l; int nb_c; } matrix; matrix* alloc_matrix(int nb_l, int nb_c){ matrix* tmp_matrix = (matrix*)malloc(sizeof(matrix)); tmp_matrix>values = (double*)aligned_alloc(64, sizeof(double) * nb_l * nb_c); tmp_matrix>nb_l = nb_l; tmp_matrix>nb_c = nb_c; return tmp_matrix; } void free_matrix(matrix** to_free){ free((*to_free)>values); free(*to_free); } matrix* zero_matrix(int nb_l, int nb_c){ matrix* z_matrix; z_matrix = alloc_matrix(nb_l, nb_c); for (int idx=0; idx < nb_l * nb_c; idx++){ z_matrix>values[idx] = 0.0; } return z_matrix; } matrix* rand_matrix(int nb_l, int nb_c, double max_abs_val){ static struct timeval seed; //static variables are zeroed at initialization matrix* rnd_matrix; rnd_matrix = alloc_matrix(nb_l, nb_c); if (seed.tv_sec == 0){ //ts_sec will never be zero after gettimeofday, whereas tv_usec could gettimeofday(&seed, NULL); srand((unsigned) seed.tv_usec); } for (int idx=0; idx < nb_l * nb_c; idx++){ rnd_matrix>values[idx] = max_abs_val * ((double)rand() / RAND_MAX * 2.0  1.0); } return rnd_matrix; } matrix* mult_matrix_avx(matrix* A, matrix* B){ /* pas trop mal en avx512 */ matrix* res_matrix = zero_matrix(A>nb_l, B>nb_c); double* res_ptr; // start index of the current line in res_matrix double* B_ptr; // start index of the current line in B __m512d A_broadcast, B_l_8, res_ptr_8; for (unsigned int idx_A = 0; idx_A < A>nb_l * A> nb_c; idx_A++){ A_broadcast = _mm512_set1_pd(A>values[idx_A]); // broadcast current value of A eight times res_ptr = res_matrix>values + (idx_A / A>nb_c) * B>nb_c; B_ptr = B>values + (idx_A % A>nb_c) * B>nb_c; for (unsigned int offset_B = 0; offset_B < B>nb_c; offset_B+=8){ B_l_8 = _mm512_load_pd(&B_ptr[offset_B]); res_ptr_8 = _mm512_load_pd(&res_ptr[offset_B]); _mm512_store_pd(&res_ptr[offset_B] , _mm512_fmadd_pd(A_broadcast, B_l_8, res_ptr_8)); } } return res_matrix; } matrix* mult_matrix(matrix* A, matrix* B){ /* non avx512 */ matrix* res_matrix = zero_matrix(A>nb_l, B>nb_c); double* res_ptr; // start index of the current line in res_matrix double* B_ptr; // start index of the current line in B for (unsigned int idx_A = 0; idx_A < A>nb_l * A> nb_c; idx_A++){ res_ptr = res_matrix>values + (idx_A / A>nb_c) * B>nb_c; B_ptr = B>values + (idx_A % A>nb_c) * B>nb_c; for (unsigned int offset_B = 0; offset_B < B>nb_c; offset_B++){ res_ptr[offset_B] += A>values[idx_A] * B_ptr[offset_B]; } } return res_matrix; } int main(int argc, char *argv[]){ struct timeval before; struct timeval after; matrix* A = rand_matrix(512, 512, 5); matrix* B = rand_matrix(512, 512, 5); matrix *C; gettimeofday(&before, NULL); for (int j=0; j<50;j++){ C = mult_matrix_avx(A, B); free_matrix(&C); // we will measure the same overhead here and in the non avx version } gettimeofday(&after, NULL); double delta = ((after.tv_sec  before.tv_sec) * 1000000 + (after.tv_usec  before.tv_usec))/50; printf("avx %lf ms\n", delta); gettimeofday(&before, NULL); for (int j=0; j<50;j++){ C = mult_matrix(A, B); free_matrix(&C); } gettimeofday(&after, NULL); delta = ((after.tv_sec  before.tv_sec) * 1000000 + (after.tv_usec  before.tv_usec))/50; printf("non avx %lf ms\n", delta); free_matrix(&A); free_matrix(&B); return 0; }

EXC_BAD_ACCESS using twodimensional array in C++
so I'm rather new to c++ and tried to write the Levensthein Distance Algorithm as a function.
#include <iostream> size_t levensthein(std::string string1, std::string string2); size_t loopiloop(size_t x, size_t y, size_t z); int main() { std::string str1{}; std::string str2{}; std::cout << "First String: "; std::cin >> str1; std::cout << "Second String: "; std::cin >> str2; std::cout << "Amount of Steps from " << str1 << " to " << str2 << "\n"; std::cout << levensthein(str1, str2) << "\n"; return 0; } size_t levensthein(std::string string1, std::string string2){ size_t sizeStr1{string1.size()}; size_t sizeStr2{string2.size()}; size_t matrix[sizeStr1][sizeStr2]; for(size_t i = 0; i <= sizeStr1; i++){ matrix[i][0] = i; } for(size_t i = 0; i <= sizeStr2; i++){ matrix[0][i] = i; } for(size_t i = 1; i <= sizeStr1; i++){ for(size_t j = 1; j <= sizeStr2; j++){ if(string1[i1] == string2[j1]){ matrix[i][j] = mini(matrix[i1][j] + 1, matrix[i1][j1], matrix[i][j1] + 1); }else{ matrix[i][j] = mini(matrix[i1][j] + 1, matrix[i1][j1] + 1, matrix[i][j1] + 1); } } } return matrix[sizeStr1][sizeStr2]; } size_t mini(size_t x, size_t y, size_t z){ if(x <= y){ if(x <= z){ return x; } }else if(y < x){ if(y <= z){ return y; } } return z; }
Ignore my "mini"function, since I'm still learning I just wanted to write a minfunction myself.
Now my the program works fine with strings that are 1 character in length.
However using strings longer than 1 characters throws a "EXC_BAD_ACCESS"Error in the return statement of the levensthein function.
I've tried googling it but all I understood was that pointers are being used that don't exist anymore, however I don't see how pointers come into play in my program.
I hope you guys can help me out!

Julia: Can I update and store the same array within an iteration of a forloop?
I am trying to update an array with a forloop and store the "current" version of the array within the same iteration of the loop as follows:
struct store a::Float64 mat::AbstractArray end function foo(x::AbstractArray) m, n = size(x) col = Array{store}(undef, m, n) A = zeros(m, n) for i in eachindex(col) A[i] = 1.0 print(A) col[i] = store(x[i], A) A[i] = 0 end return col end
I added a print() to check if the array is updated in the way I want it (it is). The matrix I want to store has all zeros except for a "1" at the current position of the index. The result I get is:
foo(rand(2,2)) 2×2 Array{store,2}: store(0.447322, [0.0 0.0; 0.0 0.0]) store(0.949405, [0.0 0.0; 0.0 0.0]) store(0.56251, [0.0 0.0; 0.0 0.0]) store(0.156834, [0.0 0.0; 0.0 0.0])
It is possible to achieve what I want by placing the array "A" in the loop, but it is also very inefficient.
Is there better to do this?
Thanks!

Cannot compile robotjs for electron on my mips platform Linux
I am trying to use robotjs in my electron on a mips based Ubuntu. Since electron has mips linux version only up to version 1.8.8 (after that, no support), so I download this 1.8.8 electronmips version. Then, I need to install Node, by checking the node version inside electron 1.8.8, I found it is node8.2.1, so I download this version source code and compile it in my mips platform, seems OK, and node and npm can run successfully. After this, I install libxtstdev libpng++dev, and then download the source code from robotjs homepage and unzip it in my directory, and under this directory, "npm install nan".
Then, I follow the robotjs homepage and try to compile it for my platform with command like:
nodegyp rebuild runtime=electron target=1.8.8 disturl=https://atom.io/download/atomshell abi=57
During compiling, it always reports error(robotjs.cc, line 159):
no matching function for call to v8::String::Utf8Value bstr(...)
. And the same error occurs for line 447 and 486.I checked the line 159/447/486 in robotjs.cc, they are as follows:
159: v8::String::Utf8Value bstr(v8::Isolate::GetCurrent(), Nan::To<v8::String>(info[0]).ToLocalChecked()); 447: v8::String::Utf8Value fstr(v8::Isolate::GetCurrent(), Nan::To<v8::String>(value).ToLocalChecked()); 486: v8::String::Utf8Value kstr(v8::Isolate::GetCurrent(), Nan::To<v8::String>(info[0]).ToLocalChecked());*
Anyone knows what's wrong with this, how should I do? thanks a lot!!

When I swap multiplier(101100) and multiplicand(010111) , refined MIPS Multiplication is not producing correct answer?
Multiplicand 010111 Multiplier 101100 Then product is 001111110100. But when I swap the multiplier and multiplicand, the product is not correct

MARS MIPS simulator's builtin assembler aligns more than requested?
I have the following data segment
.data a: .byte 0x11 .align 1 b: .word 0x22334455
Assuming that address "a" is 0x10010000, then the expected address for the word at b is 0x10010002, but MARS stores the word at 0x10010004, ignoring the explicit ".align" directive. By the way, I used MARS MIPS simulator (Version 4.5 on a MacBook Pro) to assemble the above code.
Therefore, my question is: Is this a bug, or is it expected that the behavior of MARS differs from SGI's 1992 documentation for MIPS assembly language, e.g. Page 81 of this Pascal / Assembly manual?
(MARS and nonMARS MIPS asm docs agree that
.align
in MIPS syntax takes a powerof2 arg, so.align 1
aligns to a 2^1 = 2byte boundary. Unlike GAS / Unix assembler syntax for some other architectures where.align
= byte align, where an arg of1
would be redundant.) 
Are there any design patters that can only be used for dynamically typed languages and not for statically typed languages?
Since statically typed programming languages do type checking (verifying and enforcing the constraints of types) at compiletime, one has to ensure type consistency at compilation stage itself.
However, for a language like Python, we can leverage different behaviours for different data types at runtime by making use of functions like
isinstance()
What I want to know about is has anyone been able to come up with a design patter (even for a specific use case) that is not possible to be implemented in a statically typed programming language?

Can anybody suggest good books to get a good idea of programming?
I'm new to the programming world and I'm not a CSE student, but I'm highly interested in learning programming and computer and web technologies. Have basic knowledge of general computing and using contemporary internet tools.
Dream to be a developer.
Thanks Priyashis.

How can I include sound into a project on bluej by URL?
I have a project due tomorrow for school and I'm a little confused on how I add sound to it. Our class works on the bluej platform, and the sound needs to be added by URL and not by a file on our computer, so that when the teacher runs it he can hear the music. We are doing an applet project, if that makes any difference. I have seen many demonstrations online of how to add sound to a project, but none on how to add a sound from a URL, and I am desperate for help. Thanks

How do you use gcc to generate assembly code in MIPS syntax?
DevC++ uses AT&T assembly, and I want to convert that in MIPS. I can't find the command for the Windows 10 (64bit) that does that. Linux for example uses mipslinuxgnug++ march=mips32r2 S dll.c, I think. The file I want to convert is .cpp, if that helps.

File exists but shell can't launch the process
File exists on disk (and has execute permissions):
user@host# ls l netsniffng rwxrwxrwx 1 root root 269468 Jan 16 09:42 netsniffng
Yet, when I attempt to launch it:
user@host# ./netsniffng /bin/sh: ./netsniffng: not found
This is in
tmpfs
:user@host# mount tmpfs on /tmp type tmpfs (rw,relatime)
host details, if it is of any help:
user@host# cat /proc/cpuinfo system type : bull machine : Unknown processor : 0 cpu model : Ingenic Xburst V0.1 FPU V0.0 BogoMIPS : 858.52 wait instruction : yes microsecond timers : no tlb_entries : 32 extra interrupt vector : yes hardware watchpoint : yes, count: 1, address/irw mask: [0x0fff] isa : mips32r1 ASEs implemented : shadow register sets : 1 kscratch registers : 7 core : 0 VCED exceptions : not available VCEI exceptions : not available Hardware : isvp Serial : 00000000 00000000 00000000 00000000
How could this be happening? How do I debug why
sh
isn't finding the fileEDIT
 The file certainly exists (verified with
cat
,ls
)  File was cross compiled (Host: x86)
EDIT 2
 When a file doesn't exist and I execute the file, I get
not found
:
user@host# gcc /bin/sh: gcc: not found
 The file certainly exists (verified with

Faulty Implementation of Luhn Algorithm in MIPS
In my assignment I'm supposed to implement the Luhn Algorithm. As an argument I get a credit card number as an ASCII string, so I need to convert the characters to Integers. My program runs but the output is always "correct", even if the number is wrong or invalid.
Can anyone find any mistakes I wasn't able to find?
luhn: ## for Schleife: add $t5,$zero,$zero # sum = 0 add $t0,$zero,$zero # i= 0 addi $t7,$zero,16 # t7 = lng = 16 for: blt $t0,$t7,endfor # if (i<lng) go to endfor sub $t1,$t7,$t0 # $t1 = lng  i sub $t1,$t1,1 # $t1 = lng  i  1 sll $t2,$t1,2 # $t2 = 4*(lng  i  1) add $t2,$a0,$t2 # $t2 = a+4*(lng  i  1)= &a[lng  i  1] lw $t2,0($t2) # $t2 = a[lng  i  1] = digit # maybe load byte j int_converter end_int_converter: ## erste if andi $t3,$t0,1 # $t3 = i modulo 2 beq $t3, 1, if # if ($t0 == 1) go to if j endif # else go to endif if: mul $t2,$t2,2 # digit = digit * 2 ## zweite if bgt $t2,9,ifz # if (t2 > 9) go to ifz j endifz # else go to endifz ifz: sub $t2,$t2,9 # digit = digit  9 endifz: endif: add $t5,$t5,$t2 # sum = sum + digit addi $t0,$t0,1 # i++ j for # jump to for endfor: andi $v0,$t5,9 # t4 = sum modulo 10 jr $ra # return int_converter: bne $t2,32,not_space #if not space go to not_space sub $a0,$a0,1 # a0 = a0 1 j for # jump for not_space: bgt $t2,57,invalid #ist keine Zahl, ASCII grosser als 9 blt $t2,48,invalid #ist keine Zahl, ASCII kleiner als sub $t2,$t2,48 # z.B 8 = 56 (ASCII). 5648 = 8 j end_int_converter invalid: li $v0, 1 ## return 1 for error jr $ra ## jump back invalid: li $v0, 1 ## return 1 for error jr $ra ## jump back