A faster way to find out if any word in a list is a substring in a longer word
What I have
- A string
s
of lengthm
(where m > 3) - A huge list
L
ofwords
(length > 2)
What I want to know
- Is any
word
inL
a substring ofs
Currently i have L
split up into files, one file each for each word-length 16.txt, 15.txt ... 04.txt
I then iterate over these files from n.txt --> 04.txt and basically do
cat n.txt | while read w; do if [[ $s =~ $w ]] ; then echo $w; fi; done
It's painfully slow there has to be a better way to do this.
Additional info:
L
contains about 200k words and is fairly static so I don't mind a complicated time consuming setup if it means greater speed- There are several different such lists
L
but only one of them needs to be searched at a time.
I'm pretty language agnostic (se tags) but pseudo code is also fine
1 answer
-
answered 2021-02-22 22:47
choroba
You can use
grep
without splitting the list:grep -oFf list.txt <<< "$s"
-o
will only output the matching substrings-F
will interpret the lines in list.txt as fixed strings, not regular expressions-f
will tell grep what file to use as the source of the patterns to match<<<
in bash takes the following word and makes it input to the preceding command
See also questions close to this topic
-
How to read currently downloading file -Js OR Python
Let's say that I'm downloading a file and at the same time I want to read it but that the file content always get updated. For example let's assuming that I start read the file when the progress bar was at 60% is there a way to read and wait at the same time for the remaining part which is the 40% of the file content that is currently downloading.
I tired the normal reading method in python:
with open('downloadingfile.mp4', 'rb') as f: data = f.read() # remaining code which is not important
-
Cumulative sum over a period of time
I have a dataframe with a Date column and some other columns. I need to calculate how many occurrences I have every day and create a cumulative sum. For example:
Date 2017-02-14 11:00:06 2017-02-14 14:10:31 2017-02-15 01:35:02 2017-02-18 21:10:25 2017-02-18 22:15:32 2017-02-18 23:13:55 2017-02-19 19:43:44 ...
should become:
Date Cum_Counts 2017-02-14 2 2017-02-15 3 2017-02-18 6 2017-02-19 7
I know that I can use
df['Date'].groupby(df['Date'].dt.day).count()
and get the counts per day. But I want:- the cumulative counts
- the date itself in the first column (as in the example above).
Is there a way to do this directly with groupby?
-
opencv stereoCalibrate giving wrong undistortion maps
I'm trying to calibrate images from the Waveshare IMX219-83 Stereo-Camera. The undistorted images are not correct and i cant get my head around why it doesnt work. The images are 3280 × 2464 but the numpy maps i save are all 2464 in length which seems weird to me.
calibration code:
import cv2 import numpy as np import glob from tqdm import tqdm chessboard_size = (6,9) obj_points = [] img_points_l = [] img_points_r = [] objp = np.zeros((np.prod(chessboard_size),3),dtype=np.float32) objp[:,:2] = np.mgrid[0:chessboard_size[0], 0:chessboard_size[1]].T.reshape(-1,2) calib_paths_left = sorted(glob.glob('./calib/left/*')) calib_paths_right = sorted(glob.glob('./calib/right/*')) print(calib_paths_left) criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) for i in tqdm(list(range(len(calib_paths_left)))[0::1]): image_l = cv2.imread(calib_paths_left[i]) image_r = cv2.imread(calib_paths_right[i]) gray_image_l = cv2.cvtColor(image_l, cv2.COLOR_BGR2GRAY) gray_image_r = cv2.cvtColor(image_r, cv2.COLOR_BGR2GRAY) print("Images loaded, Analizying...") ret_l,corners_l = cv2.findChessboardCorners(gray_image_l, chessboard_size, None) ret_r,corners_r = cv2.findChessboardCorners(gray_image_r, chessboard_size, None) if ret_l and ret_r: print("Chessboard detected on both images!") print(calib_paths_left[i] + " and " + calib_paths_right[i]) obj_points.append(objp) corners_fine_l = cv2.cornerSubPix(gray_image_l,corners_l,(11,11),(-1,-1),criteria) corners_fine_r = cv2.cornerSubPix(gray_image_r,corners_r,(11,11),(-1,-1),criteria) img_points_l.append(corners_fine_l) img_points_r.append(corners_fine_r) output_l = image_l.copy() output_r = image_r.copy() cv2.drawChessboardCorners(output_l,(6,9),corners_fine_l,ret_l) cv2.drawChessboardCorners(output_r,(6,9),corners_fine_r,ret_r) cv2.imshow('corners_fine_l',output_l) cv2.imshow('corners_fine_r',output_r) cv2.waitKey(500) cv2.destroyAllWindows() # Calibrate cameras individually (undistort) ret_l, mtx_l, dist_l, rvecs_l, tvecs_l = cv2.calibrateCamera( obj_points, img_points_l,gray_image_l.shape[::-1], None, None) h_l,w_l= gray_image_l.shape[:2] new_mtx_l, roi_l= cv2.getOptimalNewCameraMatrix(mtx_l,dist_l,(w_l,h_l),1,(w_l,h_l)) ret_r, mtx_r, dist_r, rvecs_r, tvecs_r = cv2.calibrateCamera( obj_points, img_points_r,gray_image_r.shape[::-1], None, None) h_r,w_r= gray_image_l.shape[:2] new_mtx_r, roi_r= cv2.getOptimalNewCameraMatrix(mtx_r,dist_r,(w_r,h_r),1,(w_r,h_r)) # Calibrate cameras as stereocamera flags = 0 flags |= cv2.CALIB_FIX_INTRINSIC criteria_stereo= (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) retval_stereo, new_mtx_l, dist_l, new_mtx_r, dist_r, rot, trans, emat, fmat = cv2.stereoCalibrate( obj_points, img_points_l, img_points_r, new_mtx_l, dist_l, new_mtx_r, dist_r, gray_image_l.shape[::-1], criteria_stereo, flags) rectify_scale= 1 rect_l, rect_r, proj_mat_l, proj_mat_r, Q, roi_l, roi_r= cv2.stereoRectify( new_mtx_l, dist_l, new_mtx_r, dist_r, gray_image_l.shape[::-1], rot, trans, rectify_scale,(0,0)) Left_Stereo_Map= cv2.initUndistortRectifyMap(new_mtx_l, dist_l, rect_l, proj_mat_l, gray_image_l.shape[::-1], cv2.CV_16SC2) Right_Stereo_Map= cv2.initUndistortRectifyMap(new_mtx_r, dist_r, rect_r, proj_mat_r, gray_image_r.shape[::-1], cv2.CV_16SC2) print("Saving parameters ......") np.save("camera_params/Left_Stereo_Map_x", Left_Stereo_Map[0]) np.save("camera_params/Left_Stereo_Map_y", Left_Stereo_Map[1]) np.save("camera_params/Right_Stereo_Map_x", Right_Stereo_Map[0]) np.save("camera_params/Right_Stereo_Map_y", Right_Stereo_Map[1])
remapping the images, showing bad results
import cv2 import numpy as np Left_Stereo_Map_x = np.load("camera_params/Left_Stereo_Map_x.npy") Left_Stereo_Map_y = np.load("camera_params/Left_Stereo_Map_y.npy") Right_Stereo_Map_x = np.load("camera_params/Right_Stereo_Map_x.npy") Right_Stereo_Map_y = np.load("camera_params/Right_Stereo_Map_y.npy") image_l = cv2.imread("calib/left/frame_l_30.jpg") image_r = cv2.imread("calib/right/frame_r_30.jpg") gray_image_l = cv2.cvtColor(image_l, cv2.COLOR_BGR2GRAY) gray_image_r = cv2.cvtColor(image_r, cv2.COLOR_BGR2GRAY) cv2.imshow("Left image before rectification", gray_image_l) cv2.imshow("Right image before rectification", gray_image_r) image_l_undist = cv2.remap(image_l,Left_Stereo_Map_x,Left_Stereo_Map_y, cv2.INTER_LANCZOS4, cv2.BORDER_CONSTANT, 0) image_r_undist = cv2.remap(image_r,Right_Stereo_Map_x,Right_Stereo_Map_y, cv2.INTER_LANCZOS4, cv2.BORDER_CONSTANT, 0) cv2.imshow("Left image after rectification", image_l_undist) cv2.imshow("Right image after rectification", image_r_undist) cv2.waitKey(0)
These are the kind of images I calibrate with:
example left image for calibration
example right image for calibration
and this is the output i get:
I checked the code quite a few times now and cant get my head around why it doesnt work.
-
what is the correct character encoding for a bash script?
what is the correct character encoding for a bash script ?
my current file comes up in Gedit as Chinese chars. comes up in nano ok.
file -bi myfile.sh ... yields text/x-shellscript; charset=binary
not sure how I got here.
-
How to sync the modification date of folders within two directories that are the same?
I have a Dropbox folder on one computer with all the original modification dates. Recently, after transferring my data onto another computer, due to a .DS_Store issue, some of the folder's "Date Modified" dates were changed to today. I am trying to write a script that would take the original modification date of a folder, and then be able to find the corresponding folder in my new computer, and change it using
touch
. The idea is to usestat
andtouch -mt
to do this. Does anyone have any suggestions or better thoughts? Thanks. -
How to write a script for bash to archive a directory and keep its name?
For my uni I need to write a simple script that makes backups of few directories and move them to another directories. I'm very new to Linux so I'm kinda lost. Before that, they showed how to archive with tar and compress with gzip, so I'm assuming that's how I need to do those backups. It is also beginner level, so the script should be as simple as possible. Here's my script:
#!/bin/bash echo directories="work/ extract/ test/" for directory in $directories do tar --create --gzip --file= ~/backups "$directory".tgz $directory done
And that's the outcome:
pi@raspberry:~ $ ./my_backup.sh tar: Removing leading `/' from member names tar (child): : Cannot open: No such file or directory tar (child): Error is not recoverable: exiting now tar: : Cannot write: Broken pipe tar: Child returned status 2 tar: Error is not recoverable: exiting now tar: Removing leading `/' from member names tar (child): : Cannot open: No such file or directory tar (child): Error is not recoverable: exiting now tar: : Cannot write: Broken pipe tar: Child returned status 2 tar: Error is not recoverable: exiting now tar: Removing leading `/' from member names tar (child): : Cannot open: No such file or directory tar (child): Error is not recoverable: exiting now tar: : Cannot write: Broken pipe tar: Child returned status 2 tar: Error is not recoverable: exiting now
I can only guess it's something to do with the names, because '/' is not allowed in names, I think, but I don't know how to remove it. I asked this question on my uni's forum, but nobody answered. Thanks a lot for your help.
-
Solving the Rubiks cube white cross in python
so I'm starting to work on a Rubiks cube solver, and I was wondering if anyone could think of a good way to approach solving the white cross, as this is done intuitively in most tutorials, rather than with using algorithms.
There are a few approaches I have considered, such as a backtracking algorithm, BFS, or possibly some machine learning - my question is what would be the best way to approach this problem?
Im using https://pypi.org/project/rubik-cube/ to handle most of the heavy lifting on the cube side of things, my aim was to build an algorithm that uses the CFOP method, listed here: https://ruwix.com/the-rubiks-cube/advanced-cfop-fridrich/ only, as the white cross is intuitive, I was thinking there must be a better way to program it, rather than hard coding all possibilities.
Thanks :)
-
Q/Design and Analysis of Algorithm diagram?
Question 1 The Tower of Hanoi is a game in which a stack of disks of varying sizes are moved from one peg to another. The diagram bellow shows the start and goal states.
The rules of the game are: ● only one disk can be moved at a time ● a disk may only be moved if it is on the top of a stack ● disk can only be placed on an empty peg or a larger disk
a) Construct a recurrence relation showing required moves to transfer n disks from peg 1 to 3. b) Solve recurrence relation to obtain explicit formula for Tower of Hanoi problem.
Question 2 a) Write a program code to multiply two square matrices using programming language of your choice. b) Identify basic operation. c) Compute time complexity of your solution.
-
Permutation element of a list where one element is also permutated
I try to figure out how to deal when it comes to permuting elements from a list if we were considering one element of this list as a group inside a list where the index that is not used by the size of the list is a None value and return all the possible combinations like this:
elements = [1,2,4] perm_matrix(elements)
which gives this kind of outpout
[[1,2],4,None], [[1,2],None,4], [[1,4],2,None], [[1,4],None,2], [[4,2],1,None], [[4,2],None,1], [4,[1,2],None], [None,[1,2],4], [2,[1,4],None], [None,[1,4],2], [1,[4,2],None], [None,[4,2],1], [4,None,[1,2]], [None,4,[1,2]], [2,None,[1,4]], [None,2,[1,4]], [1,None,[4,2]], [None,1,[4,2]]
I tried to begin with
def perm(a, prev=[],first=True,final=[]): if first == True: final = [] if not a: final.append(prev) for index, element in enumerate(a): perm(a[:index] + a[index+1:], prev + [element],False,final) return final
If the amount of elements in the list is greater than 4, we should have also a possibility of adding pairs (or more elements) in a case where we can get [[1,2,4],5,None,None] and get somewhere a value like [[1,2],[4,5],None,None] or [1,[4,5,2],None,None]. An example with a significant list like 500 elements we would get somewhere a value like [[1,6,5],[5,85,5],54,3,None,8 .... (till 500)] which is one permutation compared to others. The values which are in a list inside the first (major) list does not need to be sorted in any way, the values just need to be once in the second list (minor).
Thank you in advance.
-
Use of uninitialized value $login_output in concatenation (.) or string at
in a .pl I have the following error (capture), the script is used through SPECTRUM, a network monitoring tool, and the script is used to capture the running config of a device, what could be the fault ? Thank you.
#!/opt/SPECTRUM/bin/perl -w # This script will capture the running configuration of a # Cisco SAN-OS device through an SSH session and print it to STDOUT. # # Error Codes: # 0 = Success # 255 = Usage error # 254 = Invalid timeout value # 252 = Login error # 249 = Exec prompt not found error # 244 = Error retrieving configuration # 245 = Insufficient privileges # 253 = Unexpected output # use strict; use warnings; use Net::SSH::Expect; $ENV{'PATH'} = "/usr/bin:". $ENV{'PATH'}; ### Main ### if( $#ARGV != 4 && $#ARGV != 5 ) { print "Usage: capture_running.pl <device IP> <user> <pass> <enable_pass> <login_timeout_in_seconds> <capture_timeout_in_seconds>\n"; print STDERR "Usage: capture_running.pl <deviceIP> <user> <pass> <enable_pass> <login_timeout_in_seconds> <capture_timeout_in_seconds>\n"; exit 255; } elsif( $ARGV[4] < 1 || $ARGV[4] > 600 ) { print "$ARGV[4] is the login timeout and must be an int between 1 and 600 seconds\n"; print STDERR "$ARGV[4] is the login timeout and must be an int between 1 and 600 seconds\n"; exit 254; } elsif( $#ARGV == 5 && ( $ARGV[5] < 1 || $ARGV[5] > 600 ) ) { print "$ARGV[5] is the capture timeout and must be an int between 1 and 600 seconds\n"; print STDERR "$ARGV[5] is the capture timeout and must be an int between 1 and 600 seconds\n"; exit 254; } else { my $capture_timeout = $ARGV[4]; if( $ARGV[5] ) { $capture_timeout = $ARGV[5]; } my $errorCode = 1; my @data; my $errorString = "\nHost $ARGV[0]: \n"; ($errorCode, @data) = GetConfig( $ARGV[0], $ARGV[1], $ARGV[2], $ARGV[3], $ARGV[4], $capture_timeout ); if( $errorCode == 0 ) { # Success. The running configuration # content is in the data variable foreach ( @data ) { print "$_\n" }; # print the configuration to STDOUT exit 0; } else { print STDERR $errorString; if( $errorCode == 245 ) { print STDERR join " ", @data, "\nEnsure that the device user has sufficient privileges to disable paging and view the config\n"; } else { print STDERR join " ", @data, "\n"; } exit $errorCode; } } exit 0; sub GetConfig { my $deviceIP=shift; my $user=shift; my $pass=shift; my $epass=shift; my $login_timeout=shift; my $capture_timeout=shift; my @config; my $msg; my $ssh = Net::SSH::Expect->new ( host => $deviceIP, user => $user, password=> $pass, raw_pty => 1, no_terminal => 0, timeout => $login_timeout, ssh_option => '-1 -c DES' ); my $login_output; eval { $login_output = $ssh->login(); }; if( $@ ) { $msg = "Login has failed. Output: $login_output"; return( 252, $msg ); } # login output should contain the right prompt characters if( $login_output !~ /\>\s*\z/ ) { $msg = "Login has failed. Didn't see device prompt as expected."; $ssh->close(); return( 252, $msg ); } if( $login_output !~ /\>\s*\z/ ) # Replace '#' is the prompt character here { # we don't have the '#' prompt, means we still can't exec commands $msg = "Exec prompt not found."; $ssh->close(); return( 249, $msg ); } my $elogin = $ssh->exec("en"); my $elogin2 = $ssh->exec($epass); if( $elogin2 !~ /\#\s*\z/ ) # Replace '#' is the prompt character here { $msg = "Exec prompt not found."; $ssh->close(); return( 249, $msg ); } # disable paging # different commands for different devices, if they don't # work then we will get messages about problems later # specifically the "No prompt after 'sh run'" error # errmsg doesn't get set when these error and if we use print # and getlines to read for errors it causes problems with print "sh run" # later. # $ssh->exec( "term pager 0" ); my $paging = $ssh->exec( "term pager 0" ); if ( $paging =~ /\s?%\s/ ) { $msg = "Unable to set terminal size to 0 - Insufficient privileges"; $ssh->close(); return( 245, $msg); } $ssh->send( "sh run" ); $ssh->timeout( $capture_timeout ); $ssh->peek(0); while( my $line = $ssh->read_line() ) { # get configuration content if( $line !~ /sh run|Building configuration|Current configuration|^\s*$/ ) { push @config, $line; } } if( @config <= 0 ) { $msg = "No data retrieved, the capture timeout may be too low."; $ssh->close(); return( 244, $msg ); } if( scalar grep { $_ =~ /^%/ } @config ) { # Ensure show running actually returned the config and not an error # message containing '%' return( 245, @config ); } return( 0, @config ); # everything was okay, return the captured data }
-
Why can we access the subroutine even after specifying it in EXPORT_FAIL?
Why, even after specifying subtract function in
EXPORT_FAIL
, am I able to access the function by defining it fully likemathematics::subtract
? How can we completely make the function private?package mathematics; use Exporter; @ISA = qw(Exporter); @EXPORT = qw(add); @EXPORT_FAIL = qw(subtract); sub add() { print("you can add numbers here"."\n"); } sub subtract() { print("you can subtract the numbers here"."\n"); } 1;
-
Perl unexpected behavior: croak vs. try catch
I had seen some exceptions that pointed to (end of) the catch block itself (see the example below).
As my opinion, this is an unexpected behavior, since it alters the location of original exception and make difficult to debug (it should say die at line 13.)
It shows the (correct) line 13, if I use die/confess or using eval instead try-catch.
Not knowing how my code will be called within the stack, I started to avoid using croak now. What do you think? Did I get right, or there is a way to improve this?
Best regards, Steve
use Carp; use Try::Tiny; try { foo(); } catch { # do something before die die $_; }; # this is line 10 sub foo { croak 'die'; # this is line 13 }
Output:
die at line 10.
-
In an Eng Compute cluster - " cannot find GOROOT directory: /usr/lib/go"
I am trying to use GO in an engineering cluster. I have a private workspace, say
/eng/name/go_proj/
; how do I set the env var and the path to point to my workspace?machine2045> pwd /eng/name/go_proj/ machine2045> env | grep GOROOT machine2045> which go /usr/company_name/bin/go machine2045> go env GOROOT go: cannot find GOROOT directory: /usr/local/go machine2045> go get -u github.com/go-delve/delve/cmd/dlv go: cannot find GOROOT directory: /usr/local/go
I do not want to modify my
.bashrc
if possible since that is not recommended. -
Self-signed certificate for applications on localhost - Socket TCP (TLS)
I have a question regarding self-signed certificates.
I am developing a server with 2 different applications (one developed in go and the other in c #), each one has its own functionality but both connect with each other with a byte communication through TCP sockets with TLS. My question is the following. Being a communication on localhost, can I use a self-signed certificate to encrypt the communication (TLS) or can it cause a vulnerability?
I have read that it is not recommended to use a self-signed certificate but I think that since it is a communication within the local machine itself, there may not be any problem in using it.
-
How to Unit Test a Lambda Implemented with AWS SDK for Go V2
Given the following simple lambda written in Go that just returns a table description...
package main import ( "context" "encoding/json" "fmt" "log" "net/http" "os" "strings" "github.com/aws/aws-lambda-go/events" "github.com/aws/aws-lambda-go/lambda" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/dynamodb" "go.uber.org/zap" ) var ( dynamoDBTableName = aws.String(os.Getenv(EnvDynamoDBTableName)) logger = func() *zap.Logger { l, err := zap.NewProduction() if err != nil { log.Printf("failed to create zap logger: %v", err) } return l }() ) func handler(ctx context.Context, req events.APIGatewayProxyRequest) (events.APIGatewayProxyResponse, error) { defer logger.Sync() resp := events.APIGatewayProxyResponse{} cfg, err := config.LoadDefaultConfig(ctx) if err != nil { logger.Error("failed to load AWS config", zap.Error(err)) return resp, fmt.Errorf("failed to load AWS config: %w", err) } svc := dynamodb.NewFromConfig(cfg) // fake logic t, err := svc.DescribeTable(ctx, &dynamodb.DescribeTableInput{TableName: dynamoDBTableName}) if err != nil { logger.Error("failed to describe table", zap.String("table-name", *dynamoDBTableName), zap.Error(err)) } var sb strings.Builder enc := json.NewEncoder(&sb) err = enc.Encode(t.Table) if err != nil { logger.Error("failed to JSON encode response", zap.Error(err)) } resp.Body = sb.String() resp.StatusCode = http.StatusOK return resp, nil } func main() { lambda.Start(handler) }
... how do I unit test it locally? With the old SDK it was possible to use dependency injection like this:
type deps struct svc dynamodbiface.DynamoDBAPI table string } func (d *deps) handler(ctx context.Context, req events.APIGatewayProxyRequest) (events.APIGatewayProxyResponse, error) { ... } func main() { s := session.Must(session.NewSession()) d := deps { svc: dynamodb.New(s), table: dynamoDBTableName, } lambda.Start(d.handler) }
How do I test a lambda written with the new AWS SDK for Go V2 given that I need the context to load the config required by
dynamodb.NewFromConfig
?